sqlglot.parser
1from __future__ import annotations 2 3import itertools 4import logging 5import re 6import typing as t 7from collections import defaultdict 8 9from sqlglot import exp 10from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 11from sqlglot.helper import apply_index_offset, ensure_list, seq_get 12from sqlglot.time import format_time 13from sqlglot.tokens import Token, Tokenizer, TokenType 14from sqlglot.trie import TrieResult, in_trie, new_trie 15 16if t.TYPE_CHECKING: 17 from sqlglot._typing import E, Lit 18 from sqlglot.dialects.dialect import Dialect, DialectType 19 20 T = t.TypeVar("T") 21 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 22 23logger = logging.getLogger("sqlglot") 24 25OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 26 27# Used to detect alphabetical characters and +/- in timestamp literals 28TIME_ZONE_RE: t.Pattern[str] = re.compile(r":.*?[a-zA-Z\+\-]") 29 30 31def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 32 if len(args) == 1 and args[0].is_star: 33 return exp.StarMap(this=args[0]) 34 35 keys = [] 36 values = [] 37 for i in range(0, len(args), 2): 38 keys.append(args[i]) 39 values.append(args[i + 1]) 40 41 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 42 43 44def build_like(args: t.List) -> exp.Escape | exp.Like: 45 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 46 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 47 48 49def binary_range_parser( 50 expr_type: t.Type[exp.Expression], reverse_args: bool = False 51) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 52 def _parse_binary_range( 53 self: Parser, this: t.Optional[exp.Expression] 54 ) -> t.Optional[exp.Expression]: 55 expression = self._parse_bitwise() 56 if reverse_args: 57 this, expression = expression, this 58 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 59 60 return _parse_binary_range 61 62 63def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 64 # Default argument order is base, expression 65 this = seq_get(args, 0) 66 expression = seq_get(args, 1) 67 68 if expression: 69 if not dialect.LOG_BASE_FIRST: 70 this, expression = expression, this 71 return exp.Log(this=this, expression=expression) 72 73 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 74 75 76def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 79 80 81def build_lower(args: t.List) -> exp.Lower | exp.Hex: 82 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 85 86 87def build_upper(args: t.List) -> exp.Upper | exp.Hex: 88 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 89 arg = seq_get(args, 0) 90 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 91 92 93def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 94 def _builder(args: t.List, dialect: Dialect) -> E: 95 expression = expr_type( 96 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 97 ) 98 if len(args) > 2 and expr_type is exp.JSONExtract: 99 expression.set("expressions", args[2:]) 100 101 return expression 102 103 return _builder 104 105 106def build_mod(args: t.List) -> exp.Mod: 107 this = seq_get(args, 0) 108 expression = seq_get(args, 1) 109 110 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 111 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 112 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 113 114 return exp.Mod(this=this, expression=expression) 115 116 117def build_pad(args: t.List, is_left: bool = True): 118 return exp.Pad( 119 this=seq_get(args, 0), 120 expression=seq_get(args, 1), 121 fill_pattern=seq_get(args, 2), 122 is_left=is_left, 123 ) 124 125 126def build_array_constructor( 127 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 128) -> exp.Expression: 129 array_exp = exp_class(expressions=args) 130 131 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 132 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 133 134 return array_exp 135 136 137def build_convert_timezone( 138 args: t.List, default_source_tz: t.Optional[str] = None 139) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 140 if len(args) == 2: 141 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 142 return exp.ConvertTimezone( 143 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 144 ) 145 146 return exp.ConvertTimezone.from_arg_list(args) 147 148 149def build_trim(args: t.List, is_left: bool = True): 150 return exp.Trim( 151 this=seq_get(args, 0), 152 expression=seq_get(args, 1), 153 position="LEADING" if is_left else "TRAILING", 154 ) 155 156 157def build_coalesce( 158 args: t.List, is_nvl: t.Optional[bool] = None, is_null: t.Optional[bool] = None 159) -> exp.Coalesce: 160 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl, is_null=is_null) 161 162 163def build_locate_strposition(args: t.List): 164 return exp.StrPosition( 165 this=seq_get(args, 1), 166 substr=seq_get(args, 0), 167 position=seq_get(args, 2), 168 ) 169 170 171class _Parser(type): 172 def __new__(cls, clsname, bases, attrs): 173 klass = super().__new__(cls, clsname, bases, attrs) 174 175 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 176 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 177 178 return klass 179 180 181class Parser(metaclass=_Parser): 182 """ 183 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 184 185 Args: 186 error_level: The desired error level. 187 Default: ErrorLevel.IMMEDIATE 188 error_message_context: The amount of context to capture from a query string when displaying 189 the error message (in number of characters). 190 Default: 100 191 max_errors: Maximum number of error messages to include in a raised ParseError. 192 This is only relevant if error_level is ErrorLevel.RAISE. 193 Default: 3 194 """ 195 196 FUNCTIONS: t.Dict[str, t.Callable] = { 197 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 198 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 199 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 200 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 201 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 202 ), 203 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 204 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 205 ), 206 "CHAR": lambda args: exp.Chr(expressions=args), 207 "CHR": lambda args: exp.Chr(expressions=args), 208 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 209 "CONCAT": lambda args, dialect: exp.Concat( 210 expressions=args, 211 safe=not dialect.STRICT_STRING_CONCAT, 212 coalesce=dialect.CONCAT_COALESCE, 213 ), 214 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 215 expressions=args, 216 safe=not dialect.STRICT_STRING_CONCAT, 217 coalesce=dialect.CONCAT_COALESCE, 218 ), 219 "CONVERT_TIMEZONE": build_convert_timezone, 220 "DATE_TO_DATE_STR": lambda args: exp.Cast( 221 this=seq_get(args, 0), 222 to=exp.DataType(this=exp.DataType.Type.TEXT), 223 ), 224 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 225 start=seq_get(args, 0), 226 end=seq_get(args, 1), 227 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 228 ), 229 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 230 "HEX": build_hex, 231 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 232 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 233 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 234 "LIKE": build_like, 235 "LOG": build_logarithm, 236 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 237 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 238 "LOWER": build_lower, 239 "LPAD": lambda args: build_pad(args), 240 "LEFTPAD": lambda args: build_pad(args), 241 "LTRIM": lambda args: build_trim(args), 242 "MOD": build_mod, 243 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 244 "RPAD": lambda args: build_pad(args, is_left=False), 245 "RTRIM": lambda args: build_trim(args, is_left=False), 246 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 247 if len(args) != 2 248 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 249 "STRPOS": exp.StrPosition.from_arg_list, 250 "CHARINDEX": lambda args: build_locate_strposition(args), 251 "INSTR": exp.StrPosition.from_arg_list, 252 "LOCATE": lambda args: build_locate_strposition(args), 253 "TIME_TO_TIME_STR": lambda args: exp.Cast( 254 this=seq_get(args, 0), 255 to=exp.DataType(this=exp.DataType.Type.TEXT), 256 ), 257 "TO_HEX": build_hex, 258 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 259 this=exp.Cast( 260 this=seq_get(args, 0), 261 to=exp.DataType(this=exp.DataType.Type.TEXT), 262 ), 263 start=exp.Literal.number(1), 264 length=exp.Literal.number(10), 265 ), 266 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 267 "UPPER": build_upper, 268 "VAR_MAP": build_var_map, 269 } 270 271 NO_PAREN_FUNCTIONS = { 272 TokenType.CURRENT_DATE: exp.CurrentDate, 273 TokenType.CURRENT_DATETIME: exp.CurrentDate, 274 TokenType.CURRENT_TIME: exp.CurrentTime, 275 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 276 TokenType.CURRENT_USER: exp.CurrentUser, 277 } 278 279 STRUCT_TYPE_TOKENS = { 280 TokenType.NESTED, 281 TokenType.OBJECT, 282 TokenType.STRUCT, 283 TokenType.UNION, 284 } 285 286 NESTED_TYPE_TOKENS = { 287 TokenType.ARRAY, 288 TokenType.LIST, 289 TokenType.LOWCARDINALITY, 290 TokenType.MAP, 291 TokenType.NULLABLE, 292 TokenType.RANGE, 293 *STRUCT_TYPE_TOKENS, 294 } 295 296 ENUM_TYPE_TOKENS = { 297 TokenType.DYNAMIC, 298 TokenType.ENUM, 299 TokenType.ENUM8, 300 TokenType.ENUM16, 301 } 302 303 AGGREGATE_TYPE_TOKENS = { 304 TokenType.AGGREGATEFUNCTION, 305 TokenType.SIMPLEAGGREGATEFUNCTION, 306 } 307 308 TYPE_TOKENS = { 309 TokenType.BIT, 310 TokenType.BOOLEAN, 311 TokenType.TINYINT, 312 TokenType.UTINYINT, 313 TokenType.SMALLINT, 314 TokenType.USMALLINT, 315 TokenType.INT, 316 TokenType.UINT, 317 TokenType.BIGINT, 318 TokenType.UBIGINT, 319 TokenType.INT128, 320 TokenType.UINT128, 321 TokenType.INT256, 322 TokenType.UINT256, 323 TokenType.MEDIUMINT, 324 TokenType.UMEDIUMINT, 325 TokenType.FIXEDSTRING, 326 TokenType.FLOAT, 327 TokenType.DOUBLE, 328 TokenType.UDOUBLE, 329 TokenType.CHAR, 330 TokenType.NCHAR, 331 TokenType.VARCHAR, 332 TokenType.NVARCHAR, 333 TokenType.BPCHAR, 334 TokenType.TEXT, 335 TokenType.MEDIUMTEXT, 336 TokenType.LONGTEXT, 337 TokenType.BLOB, 338 TokenType.MEDIUMBLOB, 339 TokenType.LONGBLOB, 340 TokenType.BINARY, 341 TokenType.VARBINARY, 342 TokenType.JSON, 343 TokenType.JSONB, 344 TokenType.INTERVAL, 345 TokenType.TINYBLOB, 346 TokenType.TINYTEXT, 347 TokenType.TIME, 348 TokenType.TIMETZ, 349 TokenType.TIMESTAMP, 350 TokenType.TIMESTAMP_S, 351 TokenType.TIMESTAMP_MS, 352 TokenType.TIMESTAMP_NS, 353 TokenType.TIMESTAMPTZ, 354 TokenType.TIMESTAMPLTZ, 355 TokenType.TIMESTAMPNTZ, 356 TokenType.DATETIME, 357 TokenType.DATETIME2, 358 TokenType.DATETIME64, 359 TokenType.SMALLDATETIME, 360 TokenType.DATE, 361 TokenType.DATE32, 362 TokenType.INT4RANGE, 363 TokenType.INT4MULTIRANGE, 364 TokenType.INT8RANGE, 365 TokenType.INT8MULTIRANGE, 366 TokenType.NUMRANGE, 367 TokenType.NUMMULTIRANGE, 368 TokenType.TSRANGE, 369 TokenType.TSMULTIRANGE, 370 TokenType.TSTZRANGE, 371 TokenType.TSTZMULTIRANGE, 372 TokenType.DATERANGE, 373 TokenType.DATEMULTIRANGE, 374 TokenType.DECIMAL, 375 TokenType.DECIMAL32, 376 TokenType.DECIMAL64, 377 TokenType.DECIMAL128, 378 TokenType.DECIMAL256, 379 TokenType.UDECIMAL, 380 TokenType.BIGDECIMAL, 381 TokenType.UUID, 382 TokenType.GEOGRAPHY, 383 TokenType.GEOGRAPHYPOINT, 384 TokenType.GEOMETRY, 385 TokenType.POINT, 386 TokenType.RING, 387 TokenType.LINESTRING, 388 TokenType.MULTILINESTRING, 389 TokenType.POLYGON, 390 TokenType.MULTIPOLYGON, 391 TokenType.HLLSKETCH, 392 TokenType.HSTORE, 393 TokenType.PSEUDO_TYPE, 394 TokenType.SUPER, 395 TokenType.SERIAL, 396 TokenType.SMALLSERIAL, 397 TokenType.BIGSERIAL, 398 TokenType.XML, 399 TokenType.YEAR, 400 TokenType.USERDEFINED, 401 TokenType.MONEY, 402 TokenType.SMALLMONEY, 403 TokenType.ROWVERSION, 404 TokenType.IMAGE, 405 TokenType.VARIANT, 406 TokenType.VECTOR, 407 TokenType.VOID, 408 TokenType.OBJECT, 409 TokenType.OBJECT_IDENTIFIER, 410 TokenType.INET, 411 TokenType.IPADDRESS, 412 TokenType.IPPREFIX, 413 TokenType.IPV4, 414 TokenType.IPV6, 415 TokenType.UNKNOWN, 416 TokenType.NOTHING, 417 TokenType.NULL, 418 TokenType.NAME, 419 TokenType.TDIGEST, 420 TokenType.DYNAMIC, 421 *ENUM_TYPE_TOKENS, 422 *NESTED_TYPE_TOKENS, 423 *AGGREGATE_TYPE_TOKENS, 424 } 425 426 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 427 TokenType.BIGINT: TokenType.UBIGINT, 428 TokenType.INT: TokenType.UINT, 429 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 430 TokenType.SMALLINT: TokenType.USMALLINT, 431 TokenType.TINYINT: TokenType.UTINYINT, 432 TokenType.DECIMAL: TokenType.UDECIMAL, 433 TokenType.DOUBLE: TokenType.UDOUBLE, 434 } 435 436 SUBQUERY_PREDICATES = { 437 TokenType.ANY: exp.Any, 438 TokenType.ALL: exp.All, 439 TokenType.EXISTS: exp.Exists, 440 TokenType.SOME: exp.Any, 441 } 442 443 RESERVED_TOKENS = { 444 *Tokenizer.SINGLE_TOKENS.values(), 445 TokenType.SELECT, 446 } - {TokenType.IDENTIFIER} 447 448 DB_CREATABLES = { 449 TokenType.DATABASE, 450 TokenType.DICTIONARY, 451 TokenType.FILE_FORMAT, 452 TokenType.MODEL, 453 TokenType.NAMESPACE, 454 TokenType.SCHEMA, 455 TokenType.SEMANTIC_VIEW, 456 TokenType.SEQUENCE, 457 TokenType.SINK, 458 TokenType.SOURCE, 459 TokenType.STAGE, 460 TokenType.STORAGE_INTEGRATION, 461 TokenType.STREAMLIT, 462 TokenType.TABLE, 463 TokenType.TAG, 464 TokenType.VIEW, 465 TokenType.WAREHOUSE, 466 } 467 468 CREATABLES = { 469 TokenType.COLUMN, 470 TokenType.CONSTRAINT, 471 TokenType.FOREIGN_KEY, 472 TokenType.FUNCTION, 473 TokenType.INDEX, 474 TokenType.PROCEDURE, 475 *DB_CREATABLES, 476 } 477 478 ALTERABLES = { 479 TokenType.INDEX, 480 TokenType.TABLE, 481 TokenType.VIEW, 482 TokenType.SESSION, 483 } 484 485 # Tokens that can represent identifiers 486 ID_VAR_TOKENS = { 487 TokenType.ALL, 488 TokenType.ATTACH, 489 TokenType.VAR, 490 TokenType.ANTI, 491 TokenType.APPLY, 492 TokenType.ASC, 493 TokenType.ASOF, 494 TokenType.AUTO_INCREMENT, 495 TokenType.BEGIN, 496 TokenType.BPCHAR, 497 TokenType.CACHE, 498 TokenType.CASE, 499 TokenType.COLLATE, 500 TokenType.COMMAND, 501 TokenType.COMMENT, 502 TokenType.COMMIT, 503 TokenType.CONSTRAINT, 504 TokenType.COPY, 505 TokenType.CUBE, 506 TokenType.CURRENT_SCHEMA, 507 TokenType.DEFAULT, 508 TokenType.DELETE, 509 TokenType.DESC, 510 TokenType.DESCRIBE, 511 TokenType.DETACH, 512 TokenType.DICTIONARY, 513 TokenType.DIV, 514 TokenType.END, 515 TokenType.EXECUTE, 516 TokenType.EXPORT, 517 TokenType.ESCAPE, 518 TokenType.FALSE, 519 TokenType.FIRST, 520 TokenType.FILTER, 521 TokenType.FINAL, 522 TokenType.FORMAT, 523 TokenType.FULL, 524 TokenType.GET, 525 TokenType.IDENTIFIER, 526 TokenType.IS, 527 TokenType.ISNULL, 528 TokenType.INTERVAL, 529 TokenType.KEEP, 530 TokenType.KILL, 531 TokenType.LEFT, 532 TokenType.LIMIT, 533 TokenType.LOAD, 534 TokenType.MERGE, 535 TokenType.NATURAL, 536 TokenType.NEXT, 537 TokenType.OFFSET, 538 TokenType.OPERATOR, 539 TokenType.ORDINALITY, 540 TokenType.OVERLAPS, 541 TokenType.OVERWRITE, 542 TokenType.PARTITION, 543 TokenType.PERCENT, 544 TokenType.PIVOT, 545 TokenType.PRAGMA, 546 TokenType.PUT, 547 TokenType.RANGE, 548 TokenType.RECURSIVE, 549 TokenType.REFERENCES, 550 TokenType.REFRESH, 551 TokenType.RENAME, 552 TokenType.REPLACE, 553 TokenType.RIGHT, 554 TokenType.ROLLUP, 555 TokenType.ROW, 556 TokenType.ROWS, 557 TokenType.SEMI, 558 TokenType.SET, 559 TokenType.SETTINGS, 560 TokenType.SHOW, 561 TokenType.TEMPORARY, 562 TokenType.TOP, 563 TokenType.TRUE, 564 TokenType.TRUNCATE, 565 TokenType.UNIQUE, 566 TokenType.UNNEST, 567 TokenType.UNPIVOT, 568 TokenType.UPDATE, 569 TokenType.USE, 570 TokenType.VOLATILE, 571 TokenType.WINDOW, 572 *CREATABLES, 573 *SUBQUERY_PREDICATES, 574 *TYPE_TOKENS, 575 *NO_PAREN_FUNCTIONS, 576 } 577 ID_VAR_TOKENS.remove(TokenType.UNION) 578 579 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 580 TokenType.ANTI, 581 TokenType.ASOF, 582 TokenType.FULL, 583 TokenType.LEFT, 584 TokenType.LOCK, 585 TokenType.NATURAL, 586 TokenType.RIGHT, 587 TokenType.SEMI, 588 TokenType.WINDOW, 589 } 590 591 ALIAS_TOKENS = ID_VAR_TOKENS 592 593 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 594 595 ARRAY_CONSTRUCTORS = { 596 "ARRAY": exp.Array, 597 "LIST": exp.List, 598 } 599 600 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 601 602 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 603 604 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 605 606 FUNC_TOKENS = { 607 TokenType.COLLATE, 608 TokenType.COMMAND, 609 TokenType.CURRENT_DATE, 610 TokenType.CURRENT_DATETIME, 611 TokenType.CURRENT_SCHEMA, 612 TokenType.CURRENT_TIMESTAMP, 613 TokenType.CURRENT_TIME, 614 TokenType.CURRENT_USER, 615 TokenType.FILTER, 616 TokenType.FIRST, 617 TokenType.FORMAT, 618 TokenType.GET, 619 TokenType.GLOB, 620 TokenType.IDENTIFIER, 621 TokenType.INDEX, 622 TokenType.ISNULL, 623 TokenType.ILIKE, 624 TokenType.INSERT, 625 TokenType.LIKE, 626 TokenType.MERGE, 627 TokenType.NEXT, 628 TokenType.OFFSET, 629 TokenType.PRIMARY_KEY, 630 TokenType.RANGE, 631 TokenType.REPLACE, 632 TokenType.RLIKE, 633 TokenType.ROW, 634 TokenType.UNNEST, 635 TokenType.VAR, 636 TokenType.LEFT, 637 TokenType.RIGHT, 638 TokenType.SEQUENCE, 639 TokenType.DATE, 640 TokenType.DATETIME, 641 TokenType.TABLE, 642 TokenType.TIMESTAMP, 643 TokenType.TIMESTAMPTZ, 644 TokenType.TRUNCATE, 645 TokenType.UTC_DATE, 646 TokenType.UTC_TIME, 647 TokenType.WINDOW, 648 TokenType.XOR, 649 *TYPE_TOKENS, 650 *SUBQUERY_PREDICATES, 651 } 652 653 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 654 TokenType.AND: exp.And, 655 } 656 657 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 658 TokenType.COLON_EQ: exp.PropertyEQ, 659 } 660 661 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 662 TokenType.OR: exp.Or, 663 } 664 665 EQUALITY = { 666 TokenType.EQ: exp.EQ, 667 TokenType.NEQ: exp.NEQ, 668 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 669 } 670 671 COMPARISON = { 672 TokenType.GT: exp.GT, 673 TokenType.GTE: exp.GTE, 674 TokenType.LT: exp.LT, 675 TokenType.LTE: exp.LTE, 676 } 677 678 BITWISE = { 679 TokenType.AMP: exp.BitwiseAnd, 680 TokenType.CARET: exp.BitwiseXor, 681 TokenType.PIPE: exp.BitwiseOr, 682 } 683 684 TERM = { 685 TokenType.DASH: exp.Sub, 686 TokenType.PLUS: exp.Add, 687 TokenType.MOD: exp.Mod, 688 TokenType.COLLATE: exp.Collate, 689 } 690 691 FACTOR = { 692 TokenType.DIV: exp.IntDiv, 693 TokenType.LR_ARROW: exp.Distance, 694 TokenType.SLASH: exp.Div, 695 TokenType.STAR: exp.Mul, 696 } 697 698 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 699 700 TIMES = { 701 TokenType.TIME, 702 TokenType.TIMETZ, 703 } 704 705 TIMESTAMPS = { 706 TokenType.TIMESTAMP, 707 TokenType.TIMESTAMPNTZ, 708 TokenType.TIMESTAMPTZ, 709 TokenType.TIMESTAMPLTZ, 710 *TIMES, 711 } 712 713 SET_OPERATIONS = { 714 TokenType.UNION, 715 TokenType.INTERSECT, 716 TokenType.EXCEPT, 717 } 718 719 JOIN_METHODS = { 720 TokenType.ASOF, 721 TokenType.NATURAL, 722 TokenType.POSITIONAL, 723 } 724 725 JOIN_SIDES = { 726 TokenType.LEFT, 727 TokenType.RIGHT, 728 TokenType.FULL, 729 } 730 731 JOIN_KINDS = { 732 TokenType.ANTI, 733 TokenType.CROSS, 734 TokenType.INNER, 735 TokenType.OUTER, 736 TokenType.SEMI, 737 TokenType.STRAIGHT_JOIN, 738 } 739 740 JOIN_HINTS: t.Set[str] = set() 741 742 LAMBDAS = { 743 TokenType.ARROW: lambda self, expressions: self.expression( 744 exp.Lambda, 745 this=self._replace_lambda( 746 self._parse_assignment(), 747 expressions, 748 ), 749 expressions=expressions, 750 ), 751 TokenType.FARROW: lambda self, expressions: self.expression( 752 exp.Kwarg, 753 this=exp.var(expressions[0].name), 754 expression=self._parse_assignment(), 755 ), 756 } 757 758 COLUMN_OPERATORS = { 759 TokenType.DOT: None, 760 TokenType.DOTCOLON: lambda self, this, to: self.expression( 761 exp.JSONCast, 762 this=this, 763 to=to, 764 ), 765 TokenType.DCOLON: lambda self, this, to: self.build_cast( 766 strict=self.STRICT_CAST, this=this, to=to 767 ), 768 TokenType.ARROW: lambda self, this, path: self.expression( 769 exp.JSONExtract, 770 this=this, 771 expression=self.dialect.to_json_path(path), 772 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 773 ), 774 TokenType.DARROW: lambda self, this, path: self.expression( 775 exp.JSONExtractScalar, 776 this=this, 777 expression=self.dialect.to_json_path(path), 778 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 779 ), 780 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 781 exp.JSONBExtract, 782 this=this, 783 expression=path, 784 ), 785 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 786 exp.JSONBExtractScalar, 787 this=this, 788 expression=path, 789 ), 790 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 791 exp.JSONBContains, 792 this=this, 793 expression=key, 794 ), 795 } 796 797 CAST_COLUMN_OPERATORS = { 798 TokenType.DOTCOLON, 799 TokenType.DCOLON, 800 } 801 802 EXPRESSION_PARSERS = { 803 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 804 exp.Column: lambda self: self._parse_column(), 805 exp.Condition: lambda self: self._parse_assignment(), 806 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 807 exp.Expression: lambda self: self._parse_expression(), 808 exp.From: lambda self: self._parse_from(joins=True), 809 exp.Group: lambda self: self._parse_group(), 810 exp.Having: lambda self: self._parse_having(), 811 exp.Hint: lambda self: self._parse_hint_body(), 812 exp.Identifier: lambda self: self._parse_id_var(), 813 exp.Join: lambda self: self._parse_join(), 814 exp.Lambda: lambda self: self._parse_lambda(), 815 exp.Lateral: lambda self: self._parse_lateral(), 816 exp.Limit: lambda self: self._parse_limit(), 817 exp.Offset: lambda self: self._parse_offset(), 818 exp.Order: lambda self: self._parse_order(), 819 exp.Ordered: lambda self: self._parse_ordered(), 820 exp.Properties: lambda self: self._parse_properties(), 821 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 822 exp.Qualify: lambda self: self._parse_qualify(), 823 exp.Returning: lambda self: self._parse_returning(), 824 exp.Select: lambda self: self._parse_select(), 825 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 826 exp.Table: lambda self: self._parse_table_parts(), 827 exp.TableAlias: lambda self: self._parse_table_alias(), 828 exp.Tuple: lambda self: self._parse_value(values=False), 829 exp.Whens: lambda self: self._parse_when_matched(), 830 exp.Where: lambda self: self._parse_where(), 831 exp.Window: lambda self: self._parse_named_window(), 832 exp.With: lambda self: self._parse_with(), 833 "JOIN_TYPE": lambda self: self._parse_join_parts(), 834 } 835 836 STATEMENT_PARSERS = { 837 TokenType.ALTER: lambda self: self._parse_alter(), 838 TokenType.ANALYZE: lambda self: self._parse_analyze(), 839 TokenType.BEGIN: lambda self: self._parse_transaction(), 840 TokenType.CACHE: lambda self: self._parse_cache(), 841 TokenType.COMMENT: lambda self: self._parse_comment(), 842 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 843 TokenType.COPY: lambda self: self._parse_copy(), 844 TokenType.CREATE: lambda self: self._parse_create(), 845 TokenType.DELETE: lambda self: self._parse_delete(), 846 TokenType.DESC: lambda self: self._parse_describe(), 847 TokenType.DESCRIBE: lambda self: self._parse_describe(), 848 TokenType.DROP: lambda self: self._parse_drop(), 849 TokenType.GRANT: lambda self: self._parse_grant(), 850 TokenType.REVOKE: lambda self: self._parse_revoke(), 851 TokenType.INSERT: lambda self: self._parse_insert(), 852 TokenType.KILL: lambda self: self._parse_kill(), 853 TokenType.LOAD: lambda self: self._parse_load(), 854 TokenType.MERGE: lambda self: self._parse_merge(), 855 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 856 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 857 TokenType.REFRESH: lambda self: self._parse_refresh(), 858 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 859 TokenType.SET: lambda self: self._parse_set(), 860 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 861 TokenType.UNCACHE: lambda self: self._parse_uncache(), 862 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 863 TokenType.UPDATE: lambda self: self._parse_update(), 864 TokenType.USE: lambda self: self._parse_use(), 865 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 866 } 867 868 UNARY_PARSERS = { 869 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 870 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 871 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 872 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 873 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 874 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 875 } 876 877 STRING_PARSERS = { 878 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 879 exp.RawString, this=token.text 880 ), 881 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 882 exp.National, this=token.text 883 ), 884 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 885 TokenType.STRING: lambda self, token: self.expression( 886 exp.Literal, this=token.text, is_string=True 887 ), 888 TokenType.UNICODE_STRING: lambda self, token: self.expression( 889 exp.UnicodeString, 890 this=token.text, 891 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 892 ), 893 } 894 895 NUMERIC_PARSERS = { 896 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 897 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 898 TokenType.HEX_STRING: lambda self, token: self.expression( 899 exp.HexString, 900 this=token.text, 901 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 902 ), 903 TokenType.NUMBER: lambda self, token: self.expression( 904 exp.Literal, this=token.text, is_string=False 905 ), 906 } 907 908 PRIMARY_PARSERS = { 909 **STRING_PARSERS, 910 **NUMERIC_PARSERS, 911 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 912 TokenType.NULL: lambda self, _: self.expression(exp.Null), 913 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 914 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 915 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 916 TokenType.STAR: lambda self, _: self._parse_star_ops(), 917 } 918 919 PLACEHOLDER_PARSERS = { 920 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 921 TokenType.PARAMETER: lambda self: self._parse_parameter(), 922 TokenType.COLON: lambda self: ( 923 self.expression(exp.Placeholder, this=self._prev.text) 924 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 925 else None 926 ), 927 } 928 929 RANGE_PARSERS = { 930 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 931 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 932 TokenType.GLOB: binary_range_parser(exp.Glob), 933 TokenType.ILIKE: binary_range_parser(exp.ILike), 934 TokenType.IN: lambda self, this: self._parse_in(this), 935 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 936 TokenType.IS: lambda self, this: self._parse_is(this), 937 TokenType.LIKE: binary_range_parser(exp.Like), 938 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 939 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 940 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 941 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 942 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 943 } 944 945 PIPE_SYNTAX_TRANSFORM_PARSERS = { 946 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 947 "AS": lambda self, query: self._build_pipe_cte( 948 query, [exp.Star()], self._parse_table_alias() 949 ), 950 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 951 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 952 "ORDER BY": lambda self, query: query.order_by( 953 self._parse_order(), append=False, copy=False 954 ), 955 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 956 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 957 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 958 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 959 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 960 } 961 962 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 963 "ALLOWED_VALUES": lambda self: self.expression( 964 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 965 ), 966 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 967 "AUTO": lambda self: self._parse_auto_property(), 968 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 969 "BACKUP": lambda self: self.expression( 970 exp.BackupProperty, this=self._parse_var(any_token=True) 971 ), 972 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 973 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 974 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 975 "CHECKSUM": lambda self: self._parse_checksum(), 976 "CLUSTER BY": lambda self: self._parse_cluster(), 977 "CLUSTERED": lambda self: self._parse_clustered_by(), 978 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 979 exp.CollateProperty, **kwargs 980 ), 981 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 982 "CONTAINS": lambda self: self._parse_contains_property(), 983 "COPY": lambda self: self._parse_copy_property(), 984 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 985 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 986 "DEFINER": lambda self: self._parse_definer(), 987 "DETERMINISTIC": lambda self: self.expression( 988 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 989 ), 990 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 991 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 992 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 993 "DISTKEY": lambda self: self._parse_distkey(), 994 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 995 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 996 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 997 "ENVIRONMENT": lambda self: self.expression( 998 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 999 ), 1000 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 1001 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 1002 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 1003 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1004 "FREESPACE": lambda self: self._parse_freespace(), 1005 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 1006 "HEAP": lambda self: self.expression(exp.HeapProperty), 1007 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1008 "IMMUTABLE": lambda self: self.expression( 1009 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1010 ), 1011 "INHERITS": lambda self: self.expression( 1012 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1013 ), 1014 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1015 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1016 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1017 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1018 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1019 "LIKE": lambda self: self._parse_create_like(), 1020 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1021 "LOCK": lambda self: self._parse_locking(), 1022 "LOCKING": lambda self: self._parse_locking(), 1023 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1024 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1025 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1026 "MODIFIES": lambda self: self._parse_modifies_property(), 1027 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1028 "NO": lambda self: self._parse_no_property(), 1029 "ON": lambda self: self._parse_on_property(), 1030 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1031 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1032 "PARTITION": lambda self: self._parse_partitioned_of(), 1033 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1034 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1035 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1036 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1037 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1038 "READS": lambda self: self._parse_reads_property(), 1039 "REMOTE": lambda self: self._parse_remote_with_connection(), 1040 "RETURNS": lambda self: self._parse_returns(), 1041 "STRICT": lambda self: self.expression(exp.StrictProperty), 1042 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1043 "ROW": lambda self: self._parse_row(), 1044 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1045 "SAMPLE": lambda self: self.expression( 1046 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1047 ), 1048 "SECURE": lambda self: self.expression(exp.SecureProperty), 1049 "SECURITY": lambda self: self._parse_security(), 1050 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1051 "SETTINGS": lambda self: self._parse_settings_property(), 1052 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1053 "SORTKEY": lambda self: self._parse_sortkey(), 1054 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1055 "STABLE": lambda self: self.expression( 1056 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1057 ), 1058 "STORED": lambda self: self._parse_stored(), 1059 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1060 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1061 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1062 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1063 "TO": lambda self: self._parse_to_table(), 1064 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1065 "TRANSFORM": lambda self: self.expression( 1066 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1067 ), 1068 "TTL": lambda self: self._parse_ttl(), 1069 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1070 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1071 "VOLATILE": lambda self: self._parse_volatile_property(), 1072 "WITH": lambda self: self._parse_with_property(), 1073 } 1074 1075 CONSTRAINT_PARSERS = { 1076 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1077 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1078 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1079 "CHARACTER SET": lambda self: self.expression( 1080 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1081 ), 1082 "CHECK": lambda self: self.expression( 1083 exp.CheckColumnConstraint, 1084 this=self._parse_wrapped(self._parse_assignment), 1085 enforced=self._match_text_seq("ENFORCED"), 1086 ), 1087 "COLLATE": lambda self: self.expression( 1088 exp.CollateColumnConstraint, 1089 this=self._parse_identifier() or self._parse_column(), 1090 ), 1091 "COMMENT": lambda self: self.expression( 1092 exp.CommentColumnConstraint, this=self._parse_string() 1093 ), 1094 "COMPRESS": lambda self: self._parse_compress(), 1095 "CLUSTERED": lambda self: self.expression( 1096 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1097 ), 1098 "NONCLUSTERED": lambda self: self.expression( 1099 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1100 ), 1101 "DEFAULT": lambda self: self.expression( 1102 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1103 ), 1104 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1105 "EPHEMERAL": lambda self: self.expression( 1106 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1107 ), 1108 "EXCLUDE": lambda self: self.expression( 1109 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1110 ), 1111 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1112 "FORMAT": lambda self: self.expression( 1113 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1114 ), 1115 "GENERATED": lambda self: self._parse_generated_as_identity(), 1116 "IDENTITY": lambda self: self._parse_auto_increment(), 1117 "INLINE": lambda self: self._parse_inline(), 1118 "LIKE": lambda self: self._parse_create_like(), 1119 "NOT": lambda self: self._parse_not_constraint(), 1120 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1121 "ON": lambda self: ( 1122 self._match(TokenType.UPDATE) 1123 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1124 ) 1125 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1126 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1127 "PERIOD": lambda self: self._parse_period_for_system_time(), 1128 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1129 "REFERENCES": lambda self: self._parse_references(match=False), 1130 "TITLE": lambda self: self.expression( 1131 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1132 ), 1133 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1134 "UNIQUE": lambda self: self._parse_unique(), 1135 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1136 "WATERMARK": lambda self: self.expression( 1137 exp.WatermarkColumnConstraint, 1138 this=self._match(TokenType.FOR) and self._parse_column(), 1139 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1140 ), 1141 "WITH": lambda self: self.expression( 1142 exp.Properties, expressions=self._parse_wrapped_properties() 1143 ), 1144 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1145 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1146 } 1147 1148 def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expression]: 1149 if not self._match(TokenType.L_PAREN, advance=False): 1150 # Partitioning by bucket or truncate follows the syntax: 1151 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1152 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1153 self._retreat(self._index - 1) 1154 return None 1155 1156 klass = ( 1157 exp.PartitionedByBucket 1158 if self._prev.text.upper() == "BUCKET" 1159 else exp.PartitionByTruncate 1160 ) 1161 1162 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1163 this, expression = seq_get(args, 0), seq_get(args, 1) 1164 1165 if isinstance(this, exp.Literal): 1166 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1167 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1168 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1169 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1170 # 1171 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1172 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1173 this, expression = expression, this 1174 1175 return self.expression(klass, this=this, expression=expression) 1176 1177 ALTER_PARSERS = { 1178 "ADD": lambda self: self._parse_alter_table_add(), 1179 "AS": lambda self: self._parse_select(), 1180 "ALTER": lambda self: self._parse_alter_table_alter(), 1181 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1182 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1183 "DROP": lambda self: self._parse_alter_table_drop(), 1184 "RENAME": lambda self: self._parse_alter_table_rename(), 1185 "SET": lambda self: self._parse_alter_table_set(), 1186 "SWAP": lambda self: self.expression( 1187 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1188 ), 1189 } 1190 1191 ALTER_ALTER_PARSERS = { 1192 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1193 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1194 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1195 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1196 } 1197 1198 SCHEMA_UNNAMED_CONSTRAINTS = { 1199 "CHECK", 1200 "EXCLUDE", 1201 "FOREIGN KEY", 1202 "LIKE", 1203 "PERIOD", 1204 "PRIMARY KEY", 1205 "UNIQUE", 1206 "WATERMARK", 1207 "BUCKET", 1208 "TRUNCATE", 1209 } 1210 1211 NO_PAREN_FUNCTION_PARSERS = { 1212 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1213 "CASE": lambda self: self._parse_case(), 1214 "CONNECT_BY_ROOT": lambda self: self.expression( 1215 exp.ConnectByRoot, this=self._parse_column() 1216 ), 1217 "IF": lambda self: self._parse_if(), 1218 } 1219 1220 INVALID_FUNC_NAME_TOKENS = { 1221 TokenType.IDENTIFIER, 1222 TokenType.STRING, 1223 } 1224 1225 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1226 1227 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1228 1229 FUNCTION_PARSERS = { 1230 **{ 1231 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1232 }, 1233 **{ 1234 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1235 }, 1236 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1237 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1238 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1239 "DECODE": lambda self: self._parse_decode(), 1240 "EXTRACT": lambda self: self._parse_extract(), 1241 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1242 "GAP_FILL": lambda self: self._parse_gap_fill(), 1243 "JSON_OBJECT": lambda self: self._parse_json_object(), 1244 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1245 "JSON_TABLE": lambda self: self._parse_json_table(), 1246 "MATCH": lambda self: self._parse_match_against(), 1247 "NORMALIZE": lambda self: self._parse_normalize(), 1248 "OPENJSON": lambda self: self._parse_open_json(), 1249 "OVERLAY": lambda self: self._parse_overlay(), 1250 "POSITION": lambda self: self._parse_position(), 1251 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1252 "STRING_AGG": lambda self: self._parse_string_agg(), 1253 "SUBSTRING": lambda self: self._parse_substring(), 1254 "TRIM": lambda self: self._parse_trim(), 1255 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1256 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1257 "XMLELEMENT": lambda self: self.expression( 1258 exp.XMLElement, 1259 this=self._match_text_seq("NAME") and self._parse_id_var(), 1260 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1261 ), 1262 "XMLTABLE": lambda self: self._parse_xml_table(), 1263 } 1264 1265 QUERY_MODIFIER_PARSERS = { 1266 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1267 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1268 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1269 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1270 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1271 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1272 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1273 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1274 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1275 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1276 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1277 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1278 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1279 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1280 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1281 TokenType.CLUSTER_BY: lambda self: ( 1282 "cluster", 1283 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1284 ), 1285 TokenType.DISTRIBUTE_BY: lambda self: ( 1286 "distribute", 1287 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1288 ), 1289 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1290 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1291 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1292 } 1293 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1294 1295 SET_PARSERS = { 1296 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1297 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1298 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1299 "TRANSACTION": lambda self: self._parse_set_transaction(), 1300 } 1301 1302 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1303 1304 TYPE_LITERAL_PARSERS = { 1305 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1306 } 1307 1308 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1309 1310 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1311 1312 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1313 1314 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1315 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1316 "ISOLATION": ( 1317 ("LEVEL", "REPEATABLE", "READ"), 1318 ("LEVEL", "READ", "COMMITTED"), 1319 ("LEVEL", "READ", "UNCOMITTED"), 1320 ("LEVEL", "SERIALIZABLE"), 1321 ), 1322 "READ": ("WRITE", "ONLY"), 1323 } 1324 1325 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1326 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1327 ) 1328 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1329 1330 CREATE_SEQUENCE: OPTIONS_TYPE = { 1331 "SCALE": ("EXTEND", "NOEXTEND"), 1332 "SHARD": ("EXTEND", "NOEXTEND"), 1333 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1334 **dict.fromkeys( 1335 ( 1336 "SESSION", 1337 "GLOBAL", 1338 "KEEP", 1339 "NOKEEP", 1340 "ORDER", 1341 "NOORDER", 1342 "NOCACHE", 1343 "CYCLE", 1344 "NOCYCLE", 1345 "NOMINVALUE", 1346 "NOMAXVALUE", 1347 "NOSCALE", 1348 "NOSHARD", 1349 ), 1350 tuple(), 1351 ), 1352 } 1353 1354 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1355 1356 USABLES: OPTIONS_TYPE = dict.fromkeys( 1357 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1358 ) 1359 1360 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1361 1362 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1363 "TYPE": ("EVOLUTION",), 1364 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1365 } 1366 1367 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1368 1369 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1370 1371 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1372 "NOT": ("ENFORCED",), 1373 "MATCH": ( 1374 "FULL", 1375 "PARTIAL", 1376 "SIMPLE", 1377 ), 1378 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1379 "USING": ( 1380 "BTREE", 1381 "HASH", 1382 ), 1383 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1384 } 1385 1386 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1387 "NO": ("OTHERS",), 1388 "CURRENT": ("ROW",), 1389 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1390 } 1391 1392 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1393 1394 CLONE_KEYWORDS = {"CLONE", "COPY"} 1395 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1396 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1397 1398 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1399 1400 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1401 1402 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1403 1404 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1405 1406 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1407 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1408 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1409 1410 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1411 1412 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1413 1414 ADD_CONSTRAINT_TOKENS = { 1415 TokenType.CONSTRAINT, 1416 TokenType.FOREIGN_KEY, 1417 TokenType.INDEX, 1418 TokenType.KEY, 1419 TokenType.PRIMARY_KEY, 1420 TokenType.UNIQUE, 1421 } 1422 1423 DISTINCT_TOKENS = {TokenType.DISTINCT} 1424 1425 NULL_TOKENS = {TokenType.NULL} 1426 1427 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1428 1429 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1430 1431 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1432 1433 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1434 1435 ODBC_DATETIME_LITERALS = { 1436 "d": exp.Date, 1437 "t": exp.Time, 1438 "ts": exp.Timestamp, 1439 } 1440 1441 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1442 1443 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1444 1445 # The style options for the DESCRIBE statement 1446 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1447 1448 # The style options for the ANALYZE statement 1449 ANALYZE_STYLES = { 1450 "BUFFER_USAGE_LIMIT", 1451 "FULL", 1452 "LOCAL", 1453 "NO_WRITE_TO_BINLOG", 1454 "SAMPLE", 1455 "SKIP_LOCKED", 1456 "VERBOSE", 1457 } 1458 1459 ANALYZE_EXPRESSION_PARSERS = { 1460 "ALL": lambda self: self._parse_analyze_columns(), 1461 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1462 "DELETE": lambda self: self._parse_analyze_delete(), 1463 "DROP": lambda self: self._parse_analyze_histogram(), 1464 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1465 "LIST": lambda self: self._parse_analyze_list(), 1466 "PREDICATE": lambda self: self._parse_analyze_columns(), 1467 "UPDATE": lambda self: self._parse_analyze_histogram(), 1468 "VALIDATE": lambda self: self._parse_analyze_validate(), 1469 } 1470 1471 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1472 1473 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1474 1475 OPERATION_MODIFIERS: t.Set[str] = set() 1476 1477 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1478 1479 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1480 1481 STRICT_CAST = True 1482 1483 PREFIXED_PIVOT_COLUMNS = False 1484 IDENTIFY_PIVOT_STRINGS = False 1485 1486 LOG_DEFAULTS_TO_LN = False 1487 1488 # Whether the table sample clause expects CSV syntax 1489 TABLESAMPLE_CSV = False 1490 1491 # The default method used for table sampling 1492 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1493 1494 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1495 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1496 1497 # Whether the TRIM function expects the characters to trim as its first argument 1498 TRIM_PATTERN_FIRST = False 1499 1500 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1501 STRING_ALIASES = False 1502 1503 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1504 MODIFIERS_ATTACHED_TO_SET_OP = True 1505 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1506 1507 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1508 NO_PAREN_IF_COMMANDS = True 1509 1510 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1511 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1512 1513 # Whether the `:` operator is used to extract a value from a VARIANT column 1514 COLON_IS_VARIANT_EXTRACT = False 1515 1516 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1517 # If this is True and '(' is not found, the keyword will be treated as an identifier 1518 VALUES_FOLLOWED_BY_PAREN = True 1519 1520 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1521 SUPPORTS_IMPLICIT_UNNEST = False 1522 1523 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1524 INTERVAL_SPANS = True 1525 1526 # Whether a PARTITION clause can follow a table reference 1527 SUPPORTS_PARTITION_SELECTION = False 1528 1529 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1530 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1531 1532 # Whether the 'AS' keyword is optional in the CTE definition syntax 1533 OPTIONAL_ALIAS_TOKEN_CTE = True 1534 1535 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1536 ALTER_RENAME_REQUIRES_COLUMN = True 1537 1538 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1539 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1540 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1541 # as BigQuery, where all joins have the same precedence. 1542 JOINS_HAVE_EQUAL_PRECEDENCE = False 1543 1544 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1545 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1546 1547 # Whether map literals support arbitrary expressions as keys. 1548 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1549 # When False, keys are typically restricted to identifiers. 1550 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1551 1552 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1553 # is true for Snowflake but not for BigQuery which can also process strings 1554 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = False 1555 1556 __slots__ = ( 1557 "error_level", 1558 "error_message_context", 1559 "max_errors", 1560 "dialect", 1561 "sql", 1562 "errors", 1563 "_tokens", 1564 "_index", 1565 "_curr", 1566 "_next", 1567 "_prev", 1568 "_prev_comments", 1569 "_pipe_cte_counter", 1570 ) 1571 1572 # Autofilled 1573 SHOW_TRIE: t.Dict = {} 1574 SET_TRIE: t.Dict = {} 1575 1576 def __init__( 1577 self, 1578 error_level: t.Optional[ErrorLevel] = None, 1579 error_message_context: int = 100, 1580 max_errors: int = 3, 1581 dialect: DialectType = None, 1582 ): 1583 from sqlglot.dialects import Dialect 1584 1585 self.error_level = error_level or ErrorLevel.IMMEDIATE 1586 self.error_message_context = error_message_context 1587 self.max_errors = max_errors 1588 self.dialect = Dialect.get_or_raise(dialect) 1589 self.reset() 1590 1591 def reset(self): 1592 self.sql = "" 1593 self.errors = [] 1594 self._tokens = [] 1595 self._index = 0 1596 self._curr = None 1597 self._next = None 1598 self._prev = None 1599 self._prev_comments = None 1600 self._pipe_cte_counter = 0 1601 1602 def parse( 1603 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1604 ) -> t.List[t.Optional[exp.Expression]]: 1605 """ 1606 Parses a list of tokens and returns a list of syntax trees, one tree 1607 per parsed SQL statement. 1608 1609 Args: 1610 raw_tokens: The list of tokens. 1611 sql: The original SQL string, used to produce helpful debug messages. 1612 1613 Returns: 1614 The list of the produced syntax trees. 1615 """ 1616 return self._parse( 1617 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1618 ) 1619 1620 def parse_into( 1621 self, 1622 expression_types: exp.IntoType, 1623 raw_tokens: t.List[Token], 1624 sql: t.Optional[str] = None, 1625 ) -> t.List[t.Optional[exp.Expression]]: 1626 """ 1627 Parses a list of tokens into a given Expression type. If a collection of Expression 1628 types is given instead, this method will try to parse the token list into each one 1629 of them, stopping at the first for which the parsing succeeds. 1630 1631 Args: 1632 expression_types: The expression type(s) to try and parse the token list into. 1633 raw_tokens: The list of tokens. 1634 sql: The original SQL string, used to produce helpful debug messages. 1635 1636 Returns: 1637 The target Expression. 1638 """ 1639 errors = [] 1640 for expression_type in ensure_list(expression_types): 1641 parser = self.EXPRESSION_PARSERS.get(expression_type) 1642 if not parser: 1643 raise TypeError(f"No parser registered for {expression_type}") 1644 1645 try: 1646 return self._parse(parser, raw_tokens, sql) 1647 except ParseError as e: 1648 e.errors[0]["into_expression"] = expression_type 1649 errors.append(e) 1650 1651 raise ParseError( 1652 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1653 errors=merge_errors(errors), 1654 ) from errors[-1] 1655 1656 def _parse( 1657 self, 1658 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1659 raw_tokens: t.List[Token], 1660 sql: t.Optional[str] = None, 1661 ) -> t.List[t.Optional[exp.Expression]]: 1662 self.reset() 1663 self.sql = sql or "" 1664 1665 total = len(raw_tokens) 1666 chunks: t.List[t.List[Token]] = [[]] 1667 1668 for i, token in enumerate(raw_tokens): 1669 if token.token_type == TokenType.SEMICOLON: 1670 if token.comments: 1671 chunks.append([token]) 1672 1673 if i < total - 1: 1674 chunks.append([]) 1675 else: 1676 chunks[-1].append(token) 1677 1678 expressions = [] 1679 1680 for tokens in chunks: 1681 self._index = -1 1682 self._tokens = tokens 1683 self._advance() 1684 1685 expressions.append(parse_method(self)) 1686 1687 if self._index < len(self._tokens): 1688 self.raise_error("Invalid expression / Unexpected token") 1689 1690 self.check_errors() 1691 1692 return expressions 1693 1694 def check_errors(self) -> None: 1695 """Logs or raises any found errors, depending on the chosen error level setting.""" 1696 if self.error_level == ErrorLevel.WARN: 1697 for error in self.errors: 1698 logger.error(str(error)) 1699 elif self.error_level == ErrorLevel.RAISE and self.errors: 1700 raise ParseError( 1701 concat_messages(self.errors, self.max_errors), 1702 errors=merge_errors(self.errors), 1703 ) 1704 1705 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1706 """ 1707 Appends an error in the list of recorded errors or raises it, depending on the chosen 1708 error level setting. 1709 """ 1710 token = token or self._curr or self._prev or Token.string("") 1711 start = token.start 1712 end = token.end + 1 1713 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1714 highlight = self.sql[start:end] 1715 end_context = self.sql[end : end + self.error_message_context] 1716 1717 error = ParseError.new( 1718 f"{message}. Line {token.line}, Col: {token.col}.\n" 1719 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1720 description=message, 1721 line=token.line, 1722 col=token.col, 1723 start_context=start_context, 1724 highlight=highlight, 1725 end_context=end_context, 1726 ) 1727 1728 if self.error_level == ErrorLevel.IMMEDIATE: 1729 raise error 1730 1731 self.errors.append(error) 1732 1733 def expression( 1734 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1735 ) -> E: 1736 """ 1737 Creates a new, validated Expression. 1738 1739 Args: 1740 exp_class: The expression class to instantiate. 1741 comments: An optional list of comments to attach to the expression. 1742 kwargs: The arguments to set for the expression along with their respective values. 1743 1744 Returns: 1745 The target expression. 1746 """ 1747 instance = exp_class(**kwargs) 1748 instance.add_comments(comments) if comments else self._add_comments(instance) 1749 return self.validate_expression(instance) 1750 1751 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1752 if expression and self._prev_comments: 1753 expression.add_comments(self._prev_comments) 1754 self._prev_comments = None 1755 1756 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1757 """ 1758 Validates an Expression, making sure that all its mandatory arguments are set. 1759 1760 Args: 1761 expression: The expression to validate. 1762 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1763 1764 Returns: 1765 The validated expression. 1766 """ 1767 if self.error_level != ErrorLevel.IGNORE: 1768 for error_message in expression.error_messages(args): 1769 self.raise_error(error_message) 1770 1771 return expression 1772 1773 def _find_sql(self, start: Token, end: Token) -> str: 1774 return self.sql[start.start : end.end + 1] 1775 1776 def _is_connected(self) -> bool: 1777 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1778 1779 def _advance(self, times: int = 1) -> None: 1780 self._index += times 1781 self._curr = seq_get(self._tokens, self._index) 1782 self._next = seq_get(self._tokens, self._index + 1) 1783 1784 if self._index > 0: 1785 self._prev = self._tokens[self._index - 1] 1786 self._prev_comments = self._prev.comments 1787 else: 1788 self._prev = None 1789 self._prev_comments = None 1790 1791 def _retreat(self, index: int) -> None: 1792 if index != self._index: 1793 self._advance(index - self._index) 1794 1795 def _warn_unsupported(self) -> None: 1796 if len(self._tokens) <= 1: 1797 return 1798 1799 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1800 # interested in emitting a warning for the one being currently processed. 1801 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1802 1803 logger.warning( 1804 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1805 ) 1806 1807 def _parse_command(self) -> exp.Command: 1808 self._warn_unsupported() 1809 return self.expression( 1810 exp.Command, 1811 comments=self._prev_comments, 1812 this=self._prev.text.upper(), 1813 expression=self._parse_string(), 1814 ) 1815 1816 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1817 """ 1818 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1819 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1820 solve this by setting & resetting the parser state accordingly 1821 """ 1822 index = self._index 1823 error_level = self.error_level 1824 1825 self.error_level = ErrorLevel.IMMEDIATE 1826 try: 1827 this = parse_method() 1828 except ParseError: 1829 this = None 1830 finally: 1831 if not this or retreat: 1832 self._retreat(index) 1833 self.error_level = error_level 1834 1835 return this 1836 1837 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1838 start = self._prev 1839 exists = self._parse_exists() if allow_exists else None 1840 1841 self._match(TokenType.ON) 1842 1843 materialized = self._match_text_seq("MATERIALIZED") 1844 kind = self._match_set(self.CREATABLES) and self._prev 1845 if not kind: 1846 return self._parse_as_command(start) 1847 1848 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1849 this = self._parse_user_defined_function(kind=kind.token_type) 1850 elif kind.token_type == TokenType.TABLE: 1851 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1852 elif kind.token_type == TokenType.COLUMN: 1853 this = self._parse_column() 1854 else: 1855 this = self._parse_id_var() 1856 1857 self._match(TokenType.IS) 1858 1859 return self.expression( 1860 exp.Comment, 1861 this=this, 1862 kind=kind.text, 1863 expression=self._parse_string(), 1864 exists=exists, 1865 materialized=materialized, 1866 ) 1867 1868 def _parse_to_table( 1869 self, 1870 ) -> exp.ToTableProperty: 1871 table = self._parse_table_parts(schema=True) 1872 return self.expression(exp.ToTableProperty, this=table) 1873 1874 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1875 def _parse_ttl(self) -> exp.Expression: 1876 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1877 this = self._parse_bitwise() 1878 1879 if self._match_text_seq("DELETE"): 1880 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1881 if self._match_text_seq("RECOMPRESS"): 1882 return self.expression( 1883 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1884 ) 1885 if self._match_text_seq("TO", "DISK"): 1886 return self.expression( 1887 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1888 ) 1889 if self._match_text_seq("TO", "VOLUME"): 1890 return self.expression( 1891 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1892 ) 1893 1894 return this 1895 1896 expressions = self._parse_csv(_parse_ttl_action) 1897 where = self._parse_where() 1898 group = self._parse_group() 1899 1900 aggregates = None 1901 if group and self._match(TokenType.SET): 1902 aggregates = self._parse_csv(self._parse_set_item) 1903 1904 return self.expression( 1905 exp.MergeTreeTTL, 1906 expressions=expressions, 1907 where=where, 1908 group=group, 1909 aggregates=aggregates, 1910 ) 1911 1912 def _parse_statement(self) -> t.Optional[exp.Expression]: 1913 if self._curr is None: 1914 return None 1915 1916 if self._match_set(self.STATEMENT_PARSERS): 1917 comments = self._prev_comments 1918 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1919 stmt.add_comments(comments, prepend=True) 1920 return stmt 1921 1922 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1923 return self._parse_command() 1924 1925 expression = self._parse_expression() 1926 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1927 return self._parse_query_modifiers(expression) 1928 1929 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1930 start = self._prev 1931 temporary = self._match(TokenType.TEMPORARY) 1932 materialized = self._match_text_seq("MATERIALIZED") 1933 1934 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1935 if not kind: 1936 return self._parse_as_command(start) 1937 1938 concurrently = self._match_text_seq("CONCURRENTLY") 1939 if_exists = exists or self._parse_exists() 1940 1941 if kind == "COLUMN": 1942 this = self._parse_column() 1943 else: 1944 this = self._parse_table_parts( 1945 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1946 ) 1947 1948 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1949 1950 if self._match(TokenType.L_PAREN, advance=False): 1951 expressions = self._parse_wrapped_csv(self._parse_types) 1952 else: 1953 expressions = None 1954 1955 return self.expression( 1956 exp.Drop, 1957 exists=if_exists, 1958 this=this, 1959 expressions=expressions, 1960 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1961 temporary=temporary, 1962 materialized=materialized, 1963 cascade=self._match_text_seq("CASCADE"), 1964 constraints=self._match_text_seq("CONSTRAINTS"), 1965 purge=self._match_text_seq("PURGE"), 1966 cluster=cluster, 1967 concurrently=concurrently, 1968 ) 1969 1970 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1971 return ( 1972 self._match_text_seq("IF") 1973 and (not not_ or self._match(TokenType.NOT)) 1974 and self._match(TokenType.EXISTS) 1975 ) 1976 1977 def _parse_create(self) -> exp.Create | exp.Command: 1978 # Note: this can't be None because we've matched a statement parser 1979 start = self._prev 1980 1981 replace = ( 1982 start.token_type == TokenType.REPLACE 1983 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1984 or self._match_pair(TokenType.OR, TokenType.ALTER) 1985 ) 1986 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1987 1988 unique = self._match(TokenType.UNIQUE) 1989 1990 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1991 clustered = True 1992 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1993 "COLUMNSTORE" 1994 ): 1995 clustered = False 1996 else: 1997 clustered = None 1998 1999 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 2000 self._advance() 2001 2002 properties = None 2003 create_token = self._match_set(self.CREATABLES) and self._prev 2004 2005 if not create_token: 2006 # exp.Properties.Location.POST_CREATE 2007 properties = self._parse_properties() 2008 create_token = self._match_set(self.CREATABLES) and self._prev 2009 2010 if not properties or not create_token: 2011 return self._parse_as_command(start) 2012 2013 concurrently = self._match_text_seq("CONCURRENTLY") 2014 exists = self._parse_exists(not_=True) 2015 this = None 2016 expression: t.Optional[exp.Expression] = None 2017 indexes = None 2018 no_schema_binding = None 2019 begin = None 2020 end = None 2021 clone = None 2022 2023 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2024 nonlocal properties 2025 if properties and temp_props: 2026 properties.expressions.extend(temp_props.expressions) 2027 elif temp_props: 2028 properties = temp_props 2029 2030 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2031 this = self._parse_user_defined_function(kind=create_token.token_type) 2032 2033 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2034 extend_props(self._parse_properties()) 2035 2036 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2037 extend_props(self._parse_properties()) 2038 2039 if not expression: 2040 if self._match(TokenType.COMMAND): 2041 expression = self._parse_as_command(self._prev) 2042 else: 2043 begin = self._match(TokenType.BEGIN) 2044 return_ = self._match_text_seq("RETURN") 2045 2046 if self._match(TokenType.STRING, advance=False): 2047 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2048 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2049 expression = self._parse_string() 2050 extend_props(self._parse_properties()) 2051 else: 2052 expression = self._parse_user_defined_function_expression() 2053 2054 end = self._match_text_seq("END") 2055 2056 if return_: 2057 expression = self.expression(exp.Return, this=expression) 2058 elif create_token.token_type == TokenType.INDEX: 2059 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2060 if not self._match(TokenType.ON): 2061 index = self._parse_id_var() 2062 anonymous = False 2063 else: 2064 index = None 2065 anonymous = True 2066 2067 this = self._parse_index(index=index, anonymous=anonymous) 2068 elif create_token.token_type in self.DB_CREATABLES: 2069 table_parts = self._parse_table_parts( 2070 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2071 ) 2072 2073 # exp.Properties.Location.POST_NAME 2074 self._match(TokenType.COMMA) 2075 extend_props(self._parse_properties(before=True)) 2076 2077 this = self._parse_schema(this=table_parts) 2078 2079 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2080 extend_props(self._parse_properties()) 2081 2082 has_alias = self._match(TokenType.ALIAS) 2083 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2084 # exp.Properties.Location.POST_ALIAS 2085 extend_props(self._parse_properties()) 2086 2087 if create_token.token_type == TokenType.SEQUENCE: 2088 expression = self._parse_types() 2089 props = self._parse_properties() 2090 if props: 2091 sequence_props = exp.SequenceProperties() 2092 options = [] 2093 for prop in props: 2094 if isinstance(prop, exp.SequenceProperties): 2095 for arg, value in prop.args.items(): 2096 if arg == "options": 2097 options.extend(value) 2098 else: 2099 sequence_props.set(arg, value) 2100 prop.pop() 2101 2102 if options: 2103 sequence_props.set("options", options) 2104 2105 props.append("expressions", sequence_props) 2106 extend_props(props) 2107 else: 2108 expression = self._parse_ddl_select() 2109 2110 # Some dialects also support using a table as an alias instead of a SELECT. 2111 # Here we fallback to this as an alternative. 2112 if not expression and has_alias: 2113 expression = self._try_parse(self._parse_table_parts) 2114 2115 if create_token.token_type == TokenType.TABLE: 2116 # exp.Properties.Location.POST_EXPRESSION 2117 extend_props(self._parse_properties()) 2118 2119 indexes = [] 2120 while True: 2121 index = self._parse_index() 2122 2123 # exp.Properties.Location.POST_INDEX 2124 extend_props(self._parse_properties()) 2125 if not index: 2126 break 2127 else: 2128 self._match(TokenType.COMMA) 2129 indexes.append(index) 2130 elif create_token.token_type == TokenType.VIEW: 2131 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2132 no_schema_binding = True 2133 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2134 extend_props(self._parse_properties()) 2135 2136 shallow = self._match_text_seq("SHALLOW") 2137 2138 if self._match_texts(self.CLONE_KEYWORDS): 2139 copy = self._prev.text.lower() == "copy" 2140 clone = self.expression( 2141 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2142 ) 2143 2144 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2145 return self._parse_as_command(start) 2146 2147 create_kind_text = create_token.text.upper() 2148 return self.expression( 2149 exp.Create, 2150 this=this, 2151 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2152 replace=replace, 2153 refresh=refresh, 2154 unique=unique, 2155 expression=expression, 2156 exists=exists, 2157 properties=properties, 2158 indexes=indexes, 2159 no_schema_binding=no_schema_binding, 2160 begin=begin, 2161 end=end, 2162 clone=clone, 2163 concurrently=concurrently, 2164 clustered=clustered, 2165 ) 2166 2167 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2168 seq = exp.SequenceProperties() 2169 2170 options = [] 2171 index = self._index 2172 2173 while self._curr: 2174 self._match(TokenType.COMMA) 2175 if self._match_text_seq("INCREMENT"): 2176 self._match_text_seq("BY") 2177 self._match_text_seq("=") 2178 seq.set("increment", self._parse_term()) 2179 elif self._match_text_seq("MINVALUE"): 2180 seq.set("minvalue", self._parse_term()) 2181 elif self._match_text_seq("MAXVALUE"): 2182 seq.set("maxvalue", self._parse_term()) 2183 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2184 self._match_text_seq("=") 2185 seq.set("start", self._parse_term()) 2186 elif self._match_text_seq("CACHE"): 2187 # T-SQL allows empty CACHE which is initialized dynamically 2188 seq.set("cache", self._parse_number() or True) 2189 elif self._match_text_seq("OWNED", "BY"): 2190 # "OWNED BY NONE" is the default 2191 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2192 else: 2193 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2194 if opt: 2195 options.append(opt) 2196 else: 2197 break 2198 2199 seq.set("options", options if options else None) 2200 return None if self._index == index else seq 2201 2202 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2203 # only used for teradata currently 2204 self._match(TokenType.COMMA) 2205 2206 kwargs = { 2207 "no": self._match_text_seq("NO"), 2208 "dual": self._match_text_seq("DUAL"), 2209 "before": self._match_text_seq("BEFORE"), 2210 "default": self._match_text_seq("DEFAULT"), 2211 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2212 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2213 "after": self._match_text_seq("AFTER"), 2214 "minimum": self._match_texts(("MIN", "MINIMUM")), 2215 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2216 } 2217 2218 if self._match_texts(self.PROPERTY_PARSERS): 2219 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2220 try: 2221 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2222 except TypeError: 2223 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2224 2225 return None 2226 2227 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2228 return self._parse_wrapped_csv(self._parse_property) 2229 2230 def _parse_property(self) -> t.Optional[exp.Expression]: 2231 if self._match_texts(self.PROPERTY_PARSERS): 2232 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2233 2234 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2235 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2236 2237 if self._match_text_seq("COMPOUND", "SORTKEY"): 2238 return self._parse_sortkey(compound=True) 2239 2240 if self._match_text_seq("SQL", "SECURITY"): 2241 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2242 2243 index = self._index 2244 2245 seq_props = self._parse_sequence_properties() 2246 if seq_props: 2247 return seq_props 2248 2249 self._retreat(index) 2250 key = self._parse_column() 2251 2252 if not self._match(TokenType.EQ): 2253 self._retreat(index) 2254 return None 2255 2256 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2257 if isinstance(key, exp.Column): 2258 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2259 2260 value = self._parse_bitwise() or self._parse_var(any_token=True) 2261 2262 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2263 if isinstance(value, exp.Column): 2264 value = exp.var(value.name) 2265 2266 return self.expression(exp.Property, this=key, value=value) 2267 2268 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2269 if self._match_text_seq("BY"): 2270 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2271 2272 self._match(TokenType.ALIAS) 2273 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2274 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2275 2276 return self.expression( 2277 exp.FileFormatProperty, 2278 this=( 2279 self.expression( 2280 exp.InputOutputFormat, 2281 input_format=input_format, 2282 output_format=output_format, 2283 ) 2284 if input_format or output_format 2285 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2286 ), 2287 hive_format=True, 2288 ) 2289 2290 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2291 field = self._parse_field() 2292 if isinstance(field, exp.Identifier) and not field.quoted: 2293 field = exp.var(field) 2294 2295 return field 2296 2297 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2298 self._match(TokenType.EQ) 2299 self._match(TokenType.ALIAS) 2300 2301 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2302 2303 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2304 properties = [] 2305 while True: 2306 if before: 2307 prop = self._parse_property_before() 2308 else: 2309 prop = self._parse_property() 2310 if not prop: 2311 break 2312 for p in ensure_list(prop): 2313 properties.append(p) 2314 2315 if properties: 2316 return self.expression(exp.Properties, expressions=properties) 2317 2318 return None 2319 2320 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2321 return self.expression( 2322 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2323 ) 2324 2325 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2326 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2327 security_specifier = self._prev.text.upper() 2328 return self.expression(exp.SecurityProperty, this=security_specifier) 2329 return None 2330 2331 def _parse_settings_property(self) -> exp.SettingsProperty: 2332 return self.expression( 2333 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2334 ) 2335 2336 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2337 if self._index >= 2: 2338 pre_volatile_token = self._tokens[self._index - 2] 2339 else: 2340 pre_volatile_token = None 2341 2342 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2343 return exp.VolatileProperty() 2344 2345 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2346 2347 def _parse_retention_period(self) -> exp.Var: 2348 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2349 number = self._parse_number() 2350 number_str = f"{number} " if number else "" 2351 unit = self._parse_var(any_token=True) 2352 return exp.var(f"{number_str}{unit}") 2353 2354 def _parse_system_versioning_property( 2355 self, with_: bool = False 2356 ) -> exp.WithSystemVersioningProperty: 2357 self._match(TokenType.EQ) 2358 prop = self.expression( 2359 exp.WithSystemVersioningProperty, 2360 **{ # type: ignore 2361 "on": True, 2362 "with": with_, 2363 }, 2364 ) 2365 2366 if self._match_text_seq("OFF"): 2367 prop.set("on", False) 2368 return prop 2369 2370 self._match(TokenType.ON) 2371 if self._match(TokenType.L_PAREN): 2372 while self._curr and not self._match(TokenType.R_PAREN): 2373 if self._match_text_seq("HISTORY_TABLE", "="): 2374 prop.set("this", self._parse_table_parts()) 2375 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2376 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2377 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2378 prop.set("retention_period", self._parse_retention_period()) 2379 2380 self._match(TokenType.COMMA) 2381 2382 return prop 2383 2384 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2385 self._match(TokenType.EQ) 2386 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2387 prop = self.expression(exp.DataDeletionProperty, on=on) 2388 2389 if self._match(TokenType.L_PAREN): 2390 while self._curr and not self._match(TokenType.R_PAREN): 2391 if self._match_text_seq("FILTER_COLUMN", "="): 2392 prop.set("filter_column", self._parse_column()) 2393 elif self._match_text_seq("RETENTION_PERIOD", "="): 2394 prop.set("retention_period", self._parse_retention_period()) 2395 2396 self._match(TokenType.COMMA) 2397 2398 return prop 2399 2400 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2401 kind = "HASH" 2402 expressions: t.Optional[t.List[exp.Expression]] = None 2403 if self._match_text_seq("BY", "HASH"): 2404 expressions = self._parse_wrapped_csv(self._parse_id_var) 2405 elif self._match_text_seq("BY", "RANDOM"): 2406 kind = "RANDOM" 2407 2408 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2409 buckets: t.Optional[exp.Expression] = None 2410 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2411 buckets = self._parse_number() 2412 2413 return self.expression( 2414 exp.DistributedByProperty, 2415 expressions=expressions, 2416 kind=kind, 2417 buckets=buckets, 2418 order=self._parse_order(), 2419 ) 2420 2421 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2422 self._match_text_seq("KEY") 2423 expressions = self._parse_wrapped_id_vars() 2424 return self.expression(expr_type, expressions=expressions) 2425 2426 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2427 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2428 prop = self._parse_system_versioning_property(with_=True) 2429 self._match_r_paren() 2430 return prop 2431 2432 if self._match(TokenType.L_PAREN, advance=False): 2433 return self._parse_wrapped_properties() 2434 2435 if self._match_text_seq("JOURNAL"): 2436 return self._parse_withjournaltable() 2437 2438 if self._match_texts(self.VIEW_ATTRIBUTES): 2439 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2440 2441 if self._match_text_seq("DATA"): 2442 return self._parse_withdata(no=False) 2443 elif self._match_text_seq("NO", "DATA"): 2444 return self._parse_withdata(no=True) 2445 2446 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2447 return self._parse_serde_properties(with_=True) 2448 2449 if self._match(TokenType.SCHEMA): 2450 return self.expression( 2451 exp.WithSchemaBindingProperty, 2452 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2453 ) 2454 2455 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2456 return self.expression( 2457 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2458 ) 2459 2460 if not self._next: 2461 return None 2462 2463 return self._parse_withisolatedloading() 2464 2465 def _parse_procedure_option(self) -> exp.Expression | None: 2466 if self._match_text_seq("EXECUTE", "AS"): 2467 return self.expression( 2468 exp.ExecuteAsProperty, 2469 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2470 or self._parse_string(), 2471 ) 2472 2473 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2474 2475 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2476 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2477 self._match(TokenType.EQ) 2478 2479 user = self._parse_id_var() 2480 self._match(TokenType.PARAMETER) 2481 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2482 2483 if not user or not host: 2484 return None 2485 2486 return exp.DefinerProperty(this=f"{user}@{host}") 2487 2488 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2489 self._match(TokenType.TABLE) 2490 self._match(TokenType.EQ) 2491 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2492 2493 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2494 return self.expression(exp.LogProperty, no=no) 2495 2496 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2497 return self.expression(exp.JournalProperty, **kwargs) 2498 2499 def _parse_checksum(self) -> exp.ChecksumProperty: 2500 self._match(TokenType.EQ) 2501 2502 on = None 2503 if self._match(TokenType.ON): 2504 on = True 2505 elif self._match_text_seq("OFF"): 2506 on = False 2507 2508 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2509 2510 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2511 return self.expression( 2512 exp.Cluster, 2513 expressions=( 2514 self._parse_wrapped_csv(self._parse_ordered) 2515 if wrapped 2516 else self._parse_csv(self._parse_ordered) 2517 ), 2518 ) 2519 2520 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2521 self._match_text_seq("BY") 2522 2523 self._match_l_paren() 2524 expressions = self._parse_csv(self._parse_column) 2525 self._match_r_paren() 2526 2527 if self._match_text_seq("SORTED", "BY"): 2528 self._match_l_paren() 2529 sorted_by = self._parse_csv(self._parse_ordered) 2530 self._match_r_paren() 2531 else: 2532 sorted_by = None 2533 2534 self._match(TokenType.INTO) 2535 buckets = self._parse_number() 2536 self._match_text_seq("BUCKETS") 2537 2538 return self.expression( 2539 exp.ClusteredByProperty, 2540 expressions=expressions, 2541 sorted_by=sorted_by, 2542 buckets=buckets, 2543 ) 2544 2545 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2546 if not self._match_text_seq("GRANTS"): 2547 self._retreat(self._index - 1) 2548 return None 2549 2550 return self.expression(exp.CopyGrantsProperty) 2551 2552 def _parse_freespace(self) -> exp.FreespaceProperty: 2553 self._match(TokenType.EQ) 2554 return self.expression( 2555 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2556 ) 2557 2558 def _parse_mergeblockratio( 2559 self, no: bool = False, default: bool = False 2560 ) -> exp.MergeBlockRatioProperty: 2561 if self._match(TokenType.EQ): 2562 return self.expression( 2563 exp.MergeBlockRatioProperty, 2564 this=self._parse_number(), 2565 percent=self._match(TokenType.PERCENT), 2566 ) 2567 2568 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2569 2570 def _parse_datablocksize( 2571 self, 2572 default: t.Optional[bool] = None, 2573 minimum: t.Optional[bool] = None, 2574 maximum: t.Optional[bool] = None, 2575 ) -> exp.DataBlocksizeProperty: 2576 self._match(TokenType.EQ) 2577 size = self._parse_number() 2578 2579 units = None 2580 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2581 units = self._prev.text 2582 2583 return self.expression( 2584 exp.DataBlocksizeProperty, 2585 size=size, 2586 units=units, 2587 default=default, 2588 minimum=minimum, 2589 maximum=maximum, 2590 ) 2591 2592 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2593 self._match(TokenType.EQ) 2594 always = self._match_text_seq("ALWAYS") 2595 manual = self._match_text_seq("MANUAL") 2596 never = self._match_text_seq("NEVER") 2597 default = self._match_text_seq("DEFAULT") 2598 2599 autotemp = None 2600 if self._match_text_seq("AUTOTEMP"): 2601 autotemp = self._parse_schema() 2602 2603 return self.expression( 2604 exp.BlockCompressionProperty, 2605 always=always, 2606 manual=manual, 2607 never=never, 2608 default=default, 2609 autotemp=autotemp, 2610 ) 2611 2612 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2613 index = self._index 2614 no = self._match_text_seq("NO") 2615 concurrent = self._match_text_seq("CONCURRENT") 2616 2617 if not self._match_text_seq("ISOLATED", "LOADING"): 2618 self._retreat(index) 2619 return None 2620 2621 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2622 return self.expression( 2623 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2624 ) 2625 2626 def _parse_locking(self) -> exp.LockingProperty: 2627 if self._match(TokenType.TABLE): 2628 kind = "TABLE" 2629 elif self._match(TokenType.VIEW): 2630 kind = "VIEW" 2631 elif self._match(TokenType.ROW): 2632 kind = "ROW" 2633 elif self._match_text_seq("DATABASE"): 2634 kind = "DATABASE" 2635 else: 2636 kind = None 2637 2638 if kind in ("DATABASE", "TABLE", "VIEW"): 2639 this = self._parse_table_parts() 2640 else: 2641 this = None 2642 2643 if self._match(TokenType.FOR): 2644 for_or_in = "FOR" 2645 elif self._match(TokenType.IN): 2646 for_or_in = "IN" 2647 else: 2648 for_or_in = None 2649 2650 if self._match_text_seq("ACCESS"): 2651 lock_type = "ACCESS" 2652 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2653 lock_type = "EXCLUSIVE" 2654 elif self._match_text_seq("SHARE"): 2655 lock_type = "SHARE" 2656 elif self._match_text_seq("READ"): 2657 lock_type = "READ" 2658 elif self._match_text_seq("WRITE"): 2659 lock_type = "WRITE" 2660 elif self._match_text_seq("CHECKSUM"): 2661 lock_type = "CHECKSUM" 2662 else: 2663 lock_type = None 2664 2665 override = self._match_text_seq("OVERRIDE") 2666 2667 return self.expression( 2668 exp.LockingProperty, 2669 this=this, 2670 kind=kind, 2671 for_or_in=for_or_in, 2672 lock_type=lock_type, 2673 override=override, 2674 ) 2675 2676 def _parse_partition_by(self) -> t.List[exp.Expression]: 2677 if self._match(TokenType.PARTITION_BY): 2678 return self._parse_csv(self._parse_assignment) 2679 return [] 2680 2681 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2682 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2683 if self._match_text_seq("MINVALUE"): 2684 return exp.var("MINVALUE") 2685 if self._match_text_seq("MAXVALUE"): 2686 return exp.var("MAXVALUE") 2687 return self._parse_bitwise() 2688 2689 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2690 expression = None 2691 from_expressions = None 2692 to_expressions = None 2693 2694 if self._match(TokenType.IN): 2695 this = self._parse_wrapped_csv(self._parse_bitwise) 2696 elif self._match(TokenType.FROM): 2697 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2698 self._match_text_seq("TO") 2699 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2700 elif self._match_text_seq("WITH", "(", "MODULUS"): 2701 this = self._parse_number() 2702 self._match_text_seq(",", "REMAINDER") 2703 expression = self._parse_number() 2704 self._match_r_paren() 2705 else: 2706 self.raise_error("Failed to parse partition bound spec.") 2707 2708 return self.expression( 2709 exp.PartitionBoundSpec, 2710 this=this, 2711 expression=expression, 2712 from_expressions=from_expressions, 2713 to_expressions=to_expressions, 2714 ) 2715 2716 # https://www.postgresql.org/docs/current/sql-createtable.html 2717 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2718 if not self._match_text_seq("OF"): 2719 self._retreat(self._index - 1) 2720 return None 2721 2722 this = self._parse_table(schema=True) 2723 2724 if self._match(TokenType.DEFAULT): 2725 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2726 elif self._match_text_seq("FOR", "VALUES"): 2727 expression = self._parse_partition_bound_spec() 2728 else: 2729 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2730 2731 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2732 2733 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2734 self._match(TokenType.EQ) 2735 return self.expression( 2736 exp.PartitionedByProperty, 2737 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2738 ) 2739 2740 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2741 if self._match_text_seq("AND", "STATISTICS"): 2742 statistics = True 2743 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2744 statistics = False 2745 else: 2746 statistics = None 2747 2748 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2749 2750 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2751 if self._match_text_seq("SQL"): 2752 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2753 return None 2754 2755 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2756 if self._match_text_seq("SQL", "DATA"): 2757 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2758 return None 2759 2760 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2761 if self._match_text_seq("PRIMARY", "INDEX"): 2762 return exp.NoPrimaryIndexProperty() 2763 if self._match_text_seq("SQL"): 2764 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2765 return None 2766 2767 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2768 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2769 return exp.OnCommitProperty() 2770 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2771 return exp.OnCommitProperty(delete=True) 2772 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2773 2774 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2775 if self._match_text_seq("SQL", "DATA"): 2776 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2777 return None 2778 2779 def _parse_distkey(self) -> exp.DistKeyProperty: 2780 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2781 2782 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2783 table = self._parse_table(schema=True) 2784 2785 options = [] 2786 while self._match_texts(("INCLUDING", "EXCLUDING")): 2787 this = self._prev.text.upper() 2788 2789 id_var = self._parse_id_var() 2790 if not id_var: 2791 return None 2792 2793 options.append( 2794 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2795 ) 2796 2797 return self.expression(exp.LikeProperty, this=table, expressions=options) 2798 2799 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2800 return self.expression( 2801 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2802 ) 2803 2804 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2805 self._match(TokenType.EQ) 2806 return self.expression( 2807 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2808 ) 2809 2810 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2811 self._match_text_seq("WITH", "CONNECTION") 2812 return self.expression( 2813 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2814 ) 2815 2816 def _parse_returns(self) -> exp.ReturnsProperty: 2817 value: t.Optional[exp.Expression] 2818 null = None 2819 is_table = self._match(TokenType.TABLE) 2820 2821 if is_table: 2822 if self._match(TokenType.LT): 2823 value = self.expression( 2824 exp.Schema, 2825 this="TABLE", 2826 expressions=self._parse_csv(self._parse_struct_types), 2827 ) 2828 if not self._match(TokenType.GT): 2829 self.raise_error("Expecting >") 2830 else: 2831 value = self._parse_schema(exp.var("TABLE")) 2832 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2833 null = True 2834 value = None 2835 else: 2836 value = self._parse_types() 2837 2838 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2839 2840 def _parse_describe(self) -> exp.Describe: 2841 kind = self._match_set(self.CREATABLES) and self._prev.text 2842 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2843 if self._match(TokenType.DOT): 2844 style = None 2845 self._retreat(self._index - 2) 2846 2847 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2848 2849 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2850 this = self._parse_statement() 2851 else: 2852 this = self._parse_table(schema=True) 2853 2854 properties = self._parse_properties() 2855 expressions = properties.expressions if properties else None 2856 partition = self._parse_partition() 2857 return self.expression( 2858 exp.Describe, 2859 this=this, 2860 style=style, 2861 kind=kind, 2862 expressions=expressions, 2863 partition=partition, 2864 format=format, 2865 ) 2866 2867 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2868 kind = self._prev.text.upper() 2869 expressions = [] 2870 2871 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2872 if self._match(TokenType.WHEN): 2873 expression = self._parse_disjunction() 2874 self._match(TokenType.THEN) 2875 else: 2876 expression = None 2877 2878 else_ = self._match(TokenType.ELSE) 2879 2880 if not self._match(TokenType.INTO): 2881 return None 2882 2883 return self.expression( 2884 exp.ConditionalInsert, 2885 this=self.expression( 2886 exp.Insert, 2887 this=self._parse_table(schema=True), 2888 expression=self._parse_derived_table_values(), 2889 ), 2890 expression=expression, 2891 else_=else_, 2892 ) 2893 2894 expression = parse_conditional_insert() 2895 while expression is not None: 2896 expressions.append(expression) 2897 expression = parse_conditional_insert() 2898 2899 return self.expression( 2900 exp.MultitableInserts, 2901 kind=kind, 2902 comments=comments, 2903 expressions=expressions, 2904 source=self._parse_table(), 2905 ) 2906 2907 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2908 comments = [] 2909 hint = self._parse_hint() 2910 overwrite = self._match(TokenType.OVERWRITE) 2911 ignore = self._match(TokenType.IGNORE) 2912 local = self._match_text_seq("LOCAL") 2913 alternative = None 2914 is_function = None 2915 2916 if self._match_text_seq("DIRECTORY"): 2917 this: t.Optional[exp.Expression] = self.expression( 2918 exp.Directory, 2919 this=self._parse_var_or_string(), 2920 local=local, 2921 row_format=self._parse_row_format(match_row=True), 2922 ) 2923 else: 2924 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2925 comments += ensure_list(self._prev_comments) 2926 return self._parse_multitable_inserts(comments) 2927 2928 if self._match(TokenType.OR): 2929 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2930 2931 self._match(TokenType.INTO) 2932 comments += ensure_list(self._prev_comments) 2933 self._match(TokenType.TABLE) 2934 is_function = self._match(TokenType.FUNCTION) 2935 2936 this = ( 2937 self._parse_table(schema=True, parse_partition=True) 2938 if not is_function 2939 else self._parse_function() 2940 ) 2941 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2942 this.set("alias", self._parse_table_alias()) 2943 2944 returning = self._parse_returning() 2945 2946 return self.expression( 2947 exp.Insert, 2948 comments=comments, 2949 hint=hint, 2950 is_function=is_function, 2951 this=this, 2952 stored=self._match_text_seq("STORED") and self._parse_stored(), 2953 by_name=self._match_text_seq("BY", "NAME"), 2954 exists=self._parse_exists(), 2955 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2956 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2957 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2958 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2959 conflict=self._parse_on_conflict(), 2960 returning=returning or self._parse_returning(), 2961 overwrite=overwrite, 2962 alternative=alternative, 2963 ignore=ignore, 2964 source=self._match(TokenType.TABLE) and self._parse_table(), 2965 ) 2966 2967 def _parse_kill(self) -> exp.Kill: 2968 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2969 2970 return self.expression( 2971 exp.Kill, 2972 this=self._parse_primary(), 2973 kind=kind, 2974 ) 2975 2976 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2977 conflict = self._match_text_seq("ON", "CONFLICT") 2978 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2979 2980 if not conflict and not duplicate: 2981 return None 2982 2983 conflict_keys = None 2984 constraint = None 2985 2986 if conflict: 2987 if self._match_text_seq("ON", "CONSTRAINT"): 2988 constraint = self._parse_id_var() 2989 elif self._match(TokenType.L_PAREN): 2990 conflict_keys = self._parse_csv(self._parse_id_var) 2991 self._match_r_paren() 2992 2993 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2994 if self._prev.token_type == TokenType.UPDATE: 2995 self._match(TokenType.SET) 2996 expressions = self._parse_csv(self._parse_equality) 2997 else: 2998 expressions = None 2999 3000 return self.expression( 3001 exp.OnConflict, 3002 duplicate=duplicate, 3003 expressions=expressions, 3004 action=action, 3005 conflict_keys=conflict_keys, 3006 constraint=constraint, 3007 where=self._parse_where(), 3008 ) 3009 3010 def _parse_returning(self) -> t.Optional[exp.Returning]: 3011 if not self._match(TokenType.RETURNING): 3012 return None 3013 return self.expression( 3014 exp.Returning, 3015 expressions=self._parse_csv(self._parse_expression), 3016 into=self._match(TokenType.INTO) and self._parse_table_part(), 3017 ) 3018 3019 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3020 if not self._match(TokenType.FORMAT): 3021 return None 3022 return self._parse_row_format() 3023 3024 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 3025 index = self._index 3026 with_ = with_ or self._match_text_seq("WITH") 3027 3028 if not self._match(TokenType.SERDE_PROPERTIES): 3029 self._retreat(index) 3030 return None 3031 return self.expression( 3032 exp.SerdeProperties, 3033 **{ # type: ignore 3034 "expressions": self._parse_wrapped_properties(), 3035 "with": with_, 3036 }, 3037 ) 3038 3039 def _parse_row_format( 3040 self, match_row: bool = False 3041 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3042 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3043 return None 3044 3045 if self._match_text_seq("SERDE"): 3046 this = self._parse_string() 3047 3048 serde_properties = self._parse_serde_properties() 3049 3050 return self.expression( 3051 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3052 ) 3053 3054 self._match_text_seq("DELIMITED") 3055 3056 kwargs = {} 3057 3058 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3059 kwargs["fields"] = self._parse_string() 3060 if self._match_text_seq("ESCAPED", "BY"): 3061 kwargs["escaped"] = self._parse_string() 3062 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3063 kwargs["collection_items"] = self._parse_string() 3064 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3065 kwargs["map_keys"] = self._parse_string() 3066 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3067 kwargs["lines"] = self._parse_string() 3068 if self._match_text_seq("NULL", "DEFINED", "AS"): 3069 kwargs["null"] = self._parse_string() 3070 3071 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3072 3073 def _parse_load(self) -> exp.LoadData | exp.Command: 3074 if self._match_text_seq("DATA"): 3075 local = self._match_text_seq("LOCAL") 3076 self._match_text_seq("INPATH") 3077 inpath = self._parse_string() 3078 overwrite = self._match(TokenType.OVERWRITE) 3079 self._match_pair(TokenType.INTO, TokenType.TABLE) 3080 3081 return self.expression( 3082 exp.LoadData, 3083 this=self._parse_table(schema=True), 3084 local=local, 3085 overwrite=overwrite, 3086 inpath=inpath, 3087 partition=self._parse_partition(), 3088 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3089 serde=self._match_text_seq("SERDE") and self._parse_string(), 3090 ) 3091 return self._parse_as_command(self._prev) 3092 3093 def _parse_delete(self) -> exp.Delete: 3094 # This handles MySQL's "Multiple-Table Syntax" 3095 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3096 tables = None 3097 if not self._match(TokenType.FROM, advance=False): 3098 tables = self._parse_csv(self._parse_table) or None 3099 3100 returning = self._parse_returning() 3101 3102 return self.expression( 3103 exp.Delete, 3104 tables=tables, 3105 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3106 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3107 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3108 where=self._parse_where(), 3109 returning=returning or self._parse_returning(), 3110 limit=self._parse_limit(), 3111 ) 3112 3113 def _parse_update(self) -> exp.Update: 3114 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3115 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3116 returning = self._parse_returning() 3117 return self.expression( 3118 exp.Update, 3119 **{ # type: ignore 3120 "this": this, 3121 "expressions": expressions, 3122 "from": self._parse_from(joins=True), 3123 "where": self._parse_where(), 3124 "returning": returning or self._parse_returning(), 3125 "order": self._parse_order(), 3126 "limit": self._parse_limit(), 3127 }, 3128 ) 3129 3130 def _parse_use(self) -> exp.Use: 3131 return self.expression( 3132 exp.Use, 3133 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3134 this=self._parse_table(schema=False), 3135 ) 3136 3137 def _parse_uncache(self) -> exp.Uncache: 3138 if not self._match(TokenType.TABLE): 3139 self.raise_error("Expecting TABLE after UNCACHE") 3140 3141 return self.expression( 3142 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3143 ) 3144 3145 def _parse_cache(self) -> exp.Cache: 3146 lazy = self._match_text_seq("LAZY") 3147 self._match(TokenType.TABLE) 3148 table = self._parse_table(schema=True) 3149 3150 options = [] 3151 if self._match_text_seq("OPTIONS"): 3152 self._match_l_paren() 3153 k = self._parse_string() 3154 self._match(TokenType.EQ) 3155 v = self._parse_string() 3156 options = [k, v] 3157 self._match_r_paren() 3158 3159 self._match(TokenType.ALIAS) 3160 return self.expression( 3161 exp.Cache, 3162 this=table, 3163 lazy=lazy, 3164 options=options, 3165 expression=self._parse_select(nested=True), 3166 ) 3167 3168 def _parse_partition(self) -> t.Optional[exp.Partition]: 3169 if not self._match_texts(self.PARTITION_KEYWORDS): 3170 return None 3171 3172 return self.expression( 3173 exp.Partition, 3174 subpartition=self._prev.text.upper() == "SUBPARTITION", 3175 expressions=self._parse_wrapped_csv(self._parse_assignment), 3176 ) 3177 3178 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3179 def _parse_value_expression() -> t.Optional[exp.Expression]: 3180 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3181 return exp.var(self._prev.text.upper()) 3182 return self._parse_expression() 3183 3184 if self._match(TokenType.L_PAREN): 3185 expressions = self._parse_csv(_parse_value_expression) 3186 self._match_r_paren() 3187 return self.expression(exp.Tuple, expressions=expressions) 3188 3189 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3190 expression = self._parse_expression() 3191 if expression: 3192 return self.expression(exp.Tuple, expressions=[expression]) 3193 return None 3194 3195 def _parse_projections(self) -> t.List[exp.Expression]: 3196 return self._parse_expressions() 3197 3198 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3199 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3200 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3201 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3202 ) 3203 elif self._match(TokenType.FROM): 3204 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3205 # Support parentheses for duckdb FROM-first syntax 3206 select = self._parse_select() 3207 if select: 3208 select.set("from", from_) 3209 this = select 3210 else: 3211 this = exp.select("*").from_(t.cast(exp.From, from_)) 3212 else: 3213 this = ( 3214 self._parse_table(consume_pipe=True) 3215 if table 3216 else self._parse_select(nested=True, parse_set_operation=False) 3217 ) 3218 3219 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3220 # in case a modifier (e.g. join) is following 3221 if table and isinstance(this, exp.Values) and this.alias: 3222 alias = this.args["alias"].pop() 3223 this = exp.Table(this=this, alias=alias) 3224 3225 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3226 3227 return this 3228 3229 def _parse_select( 3230 self, 3231 nested: bool = False, 3232 table: bool = False, 3233 parse_subquery_alias: bool = True, 3234 parse_set_operation: bool = True, 3235 consume_pipe: bool = True, 3236 ) -> t.Optional[exp.Expression]: 3237 query = self._parse_select_query( 3238 nested=nested, 3239 table=table, 3240 parse_subquery_alias=parse_subquery_alias, 3241 parse_set_operation=parse_set_operation, 3242 ) 3243 3244 if ( 3245 consume_pipe 3246 and self._match(TokenType.PIPE_GT, advance=False) 3247 and isinstance(query, exp.Query) 3248 ): 3249 query = self._parse_pipe_syntax_query(query) 3250 query = query.subquery(copy=False) if query and table else query 3251 3252 return query 3253 3254 def _parse_select_query( 3255 self, 3256 nested: bool = False, 3257 table: bool = False, 3258 parse_subquery_alias: bool = True, 3259 parse_set_operation: bool = True, 3260 ) -> t.Optional[exp.Expression]: 3261 cte = self._parse_with() 3262 3263 if cte: 3264 this = self._parse_statement() 3265 3266 if not this: 3267 self.raise_error("Failed to parse any statement following CTE") 3268 return cte 3269 3270 if "with" in this.arg_types: 3271 this.set("with", cte) 3272 else: 3273 self.raise_error(f"{this.key} does not support CTE") 3274 this = cte 3275 3276 return this 3277 3278 # duckdb supports leading with FROM x 3279 from_ = ( 3280 self._parse_from(consume_pipe=True) 3281 if self._match(TokenType.FROM, advance=False) 3282 else None 3283 ) 3284 3285 if self._match(TokenType.SELECT): 3286 comments = self._prev_comments 3287 3288 hint = self._parse_hint() 3289 3290 if self._next and not self._next.token_type == TokenType.DOT: 3291 all_ = self._match(TokenType.ALL) 3292 distinct = self._match_set(self.DISTINCT_TOKENS) 3293 else: 3294 all_, distinct = None, None 3295 3296 kind = ( 3297 self._match(TokenType.ALIAS) 3298 and self._match_texts(("STRUCT", "VALUE")) 3299 and self._prev.text.upper() 3300 ) 3301 3302 if distinct: 3303 distinct = self.expression( 3304 exp.Distinct, 3305 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3306 ) 3307 3308 if all_ and distinct: 3309 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3310 3311 operation_modifiers = [] 3312 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3313 operation_modifiers.append(exp.var(self._prev.text.upper())) 3314 3315 limit = self._parse_limit(top=True) 3316 projections = self._parse_projections() 3317 3318 this = self.expression( 3319 exp.Select, 3320 kind=kind, 3321 hint=hint, 3322 distinct=distinct, 3323 expressions=projections, 3324 limit=limit, 3325 operation_modifiers=operation_modifiers or None, 3326 ) 3327 this.comments = comments 3328 3329 into = self._parse_into() 3330 if into: 3331 this.set("into", into) 3332 3333 if not from_: 3334 from_ = self._parse_from() 3335 3336 if from_: 3337 this.set("from", from_) 3338 3339 this = self._parse_query_modifiers(this) 3340 elif (table or nested) and self._match(TokenType.L_PAREN): 3341 this = self._parse_wrapped_select(table=table) 3342 3343 # We return early here so that the UNION isn't attached to the subquery by the 3344 # following call to _parse_set_operations, but instead becomes the parent node 3345 self._match_r_paren() 3346 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3347 elif self._match(TokenType.VALUES, advance=False): 3348 this = self._parse_derived_table_values() 3349 elif from_: 3350 this = exp.select("*").from_(from_.this, copy=False) 3351 elif self._match(TokenType.SUMMARIZE): 3352 table = self._match(TokenType.TABLE) 3353 this = self._parse_select() or self._parse_string() or self._parse_table() 3354 return self.expression(exp.Summarize, this=this, table=table) 3355 elif self._match(TokenType.DESCRIBE): 3356 this = self._parse_describe() 3357 elif self._match_text_seq("STREAM"): 3358 this = self._parse_function() 3359 if this: 3360 this = self.expression(exp.Stream, this=this) 3361 else: 3362 self._retreat(self._index - 1) 3363 else: 3364 this = None 3365 3366 return self._parse_set_operations(this) if parse_set_operation else this 3367 3368 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3369 self._match_text_seq("SEARCH") 3370 3371 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3372 3373 if not kind: 3374 return None 3375 3376 self._match_text_seq("FIRST", "BY") 3377 3378 return self.expression( 3379 exp.RecursiveWithSearch, 3380 kind=kind, 3381 this=self._parse_id_var(), 3382 expression=self._match_text_seq("SET") and self._parse_id_var(), 3383 using=self._match_text_seq("USING") and self._parse_id_var(), 3384 ) 3385 3386 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3387 if not skip_with_token and not self._match(TokenType.WITH): 3388 return None 3389 3390 comments = self._prev_comments 3391 recursive = self._match(TokenType.RECURSIVE) 3392 3393 last_comments = None 3394 expressions = [] 3395 while True: 3396 cte = self._parse_cte() 3397 if isinstance(cte, exp.CTE): 3398 expressions.append(cte) 3399 if last_comments: 3400 cte.add_comments(last_comments) 3401 3402 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3403 break 3404 else: 3405 self._match(TokenType.WITH) 3406 3407 last_comments = self._prev_comments 3408 3409 return self.expression( 3410 exp.With, 3411 comments=comments, 3412 expressions=expressions, 3413 recursive=recursive, 3414 search=self._parse_recursive_with_search(), 3415 ) 3416 3417 def _parse_cte(self) -> t.Optional[exp.CTE]: 3418 index = self._index 3419 3420 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3421 if not alias or not alias.this: 3422 self.raise_error("Expected CTE to have alias") 3423 3424 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3425 self._retreat(index) 3426 return None 3427 3428 comments = self._prev_comments 3429 3430 if self._match_text_seq("NOT", "MATERIALIZED"): 3431 materialized = False 3432 elif self._match_text_seq("MATERIALIZED"): 3433 materialized = True 3434 else: 3435 materialized = None 3436 3437 cte = self.expression( 3438 exp.CTE, 3439 this=self._parse_wrapped(self._parse_statement), 3440 alias=alias, 3441 materialized=materialized, 3442 comments=comments, 3443 ) 3444 3445 values = cte.this 3446 if isinstance(values, exp.Values): 3447 if values.alias: 3448 cte.set("this", exp.select("*").from_(values)) 3449 else: 3450 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3451 3452 return cte 3453 3454 def _parse_table_alias( 3455 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3456 ) -> t.Optional[exp.TableAlias]: 3457 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3458 # so this section tries to parse the clause version and if it fails, it treats the token 3459 # as an identifier (alias) 3460 if self._can_parse_limit_or_offset(): 3461 return None 3462 3463 any_token = self._match(TokenType.ALIAS) 3464 alias = ( 3465 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3466 or self._parse_string_as_identifier() 3467 ) 3468 3469 index = self._index 3470 if self._match(TokenType.L_PAREN): 3471 columns = self._parse_csv(self._parse_function_parameter) 3472 self._match_r_paren() if columns else self._retreat(index) 3473 else: 3474 columns = None 3475 3476 if not alias and not columns: 3477 return None 3478 3479 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3480 3481 # We bubble up comments from the Identifier to the TableAlias 3482 if isinstance(alias, exp.Identifier): 3483 table_alias.add_comments(alias.pop_comments()) 3484 3485 return table_alias 3486 3487 def _parse_subquery( 3488 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3489 ) -> t.Optional[exp.Subquery]: 3490 if not this: 3491 return None 3492 3493 return self.expression( 3494 exp.Subquery, 3495 this=this, 3496 pivots=self._parse_pivots(), 3497 alias=self._parse_table_alias() if parse_alias else None, 3498 sample=self._parse_table_sample(), 3499 ) 3500 3501 def _implicit_unnests_to_explicit(self, this: E) -> E: 3502 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3503 3504 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3505 for i, join in enumerate(this.args.get("joins") or []): 3506 table = join.this 3507 normalized_table = table.copy() 3508 normalized_table.meta["maybe_column"] = True 3509 normalized_table = _norm(normalized_table, dialect=self.dialect) 3510 3511 if isinstance(table, exp.Table) and not join.args.get("on"): 3512 if normalized_table.parts[0].name in refs: 3513 table_as_column = table.to_column() 3514 unnest = exp.Unnest(expressions=[table_as_column]) 3515 3516 # Table.to_column creates a parent Alias node that we want to convert to 3517 # a TableAlias and attach to the Unnest, so it matches the parser's output 3518 if isinstance(table.args.get("alias"), exp.TableAlias): 3519 table_as_column.replace(table_as_column.this) 3520 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3521 3522 table.replace(unnest) 3523 3524 refs.add(normalized_table.alias_or_name) 3525 3526 return this 3527 3528 def _parse_query_modifiers( 3529 self, this: t.Optional[exp.Expression] 3530 ) -> t.Optional[exp.Expression]: 3531 if isinstance(this, self.MODIFIABLES): 3532 for join in self._parse_joins(): 3533 this.append("joins", join) 3534 for lateral in iter(self._parse_lateral, None): 3535 this.append("laterals", lateral) 3536 3537 while True: 3538 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3539 modifier_token = self._curr 3540 parser = self.QUERY_MODIFIER_PARSERS[modifier_token.token_type] 3541 key, expression = parser(self) 3542 3543 if expression: 3544 if this.args.get(key): 3545 self.raise_error( 3546 f"Found multiple '{modifier_token.text.upper()}' clauses", 3547 token=modifier_token, 3548 ) 3549 3550 this.set(key, expression) 3551 if key == "limit": 3552 offset = expression.args.pop("offset", None) 3553 3554 if offset: 3555 offset = exp.Offset(expression=offset) 3556 this.set("offset", offset) 3557 3558 limit_by_expressions = expression.expressions 3559 expression.set("expressions", None) 3560 offset.set("expressions", limit_by_expressions) 3561 continue 3562 break 3563 3564 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3565 this = self._implicit_unnests_to_explicit(this) 3566 3567 return this 3568 3569 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3570 start = self._curr 3571 while self._curr: 3572 self._advance() 3573 3574 end = self._tokens[self._index - 1] 3575 return exp.Hint(expressions=[self._find_sql(start, end)]) 3576 3577 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3578 return self._parse_function_call() 3579 3580 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3581 start_index = self._index 3582 should_fallback_to_string = False 3583 3584 hints = [] 3585 try: 3586 for hint in iter( 3587 lambda: self._parse_csv( 3588 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3589 ), 3590 [], 3591 ): 3592 hints.extend(hint) 3593 except ParseError: 3594 should_fallback_to_string = True 3595 3596 if should_fallback_to_string or self._curr: 3597 self._retreat(start_index) 3598 return self._parse_hint_fallback_to_string() 3599 3600 return self.expression(exp.Hint, expressions=hints) 3601 3602 def _parse_hint(self) -> t.Optional[exp.Hint]: 3603 if self._match(TokenType.HINT) and self._prev_comments: 3604 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3605 3606 return None 3607 3608 def _parse_into(self) -> t.Optional[exp.Into]: 3609 if not self._match(TokenType.INTO): 3610 return None 3611 3612 temp = self._match(TokenType.TEMPORARY) 3613 unlogged = self._match_text_seq("UNLOGGED") 3614 self._match(TokenType.TABLE) 3615 3616 return self.expression( 3617 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3618 ) 3619 3620 def _parse_from( 3621 self, 3622 joins: bool = False, 3623 skip_from_token: bool = False, 3624 consume_pipe: bool = False, 3625 ) -> t.Optional[exp.From]: 3626 if not skip_from_token and not self._match(TokenType.FROM): 3627 return None 3628 3629 return self.expression( 3630 exp.From, 3631 comments=self._prev_comments, 3632 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3633 ) 3634 3635 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3636 return self.expression( 3637 exp.MatchRecognizeMeasure, 3638 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3639 this=self._parse_expression(), 3640 ) 3641 3642 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3643 if not self._match(TokenType.MATCH_RECOGNIZE): 3644 return None 3645 3646 self._match_l_paren() 3647 3648 partition = self._parse_partition_by() 3649 order = self._parse_order() 3650 3651 measures = ( 3652 self._parse_csv(self._parse_match_recognize_measure) 3653 if self._match_text_seq("MEASURES") 3654 else None 3655 ) 3656 3657 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3658 rows = exp.var("ONE ROW PER MATCH") 3659 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3660 text = "ALL ROWS PER MATCH" 3661 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3662 text += " SHOW EMPTY MATCHES" 3663 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3664 text += " OMIT EMPTY MATCHES" 3665 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3666 text += " WITH UNMATCHED ROWS" 3667 rows = exp.var(text) 3668 else: 3669 rows = None 3670 3671 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3672 text = "AFTER MATCH SKIP" 3673 if self._match_text_seq("PAST", "LAST", "ROW"): 3674 text += " PAST LAST ROW" 3675 elif self._match_text_seq("TO", "NEXT", "ROW"): 3676 text += " TO NEXT ROW" 3677 elif self._match_text_seq("TO", "FIRST"): 3678 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3679 elif self._match_text_seq("TO", "LAST"): 3680 text += f" TO LAST {self._advance_any().text}" # type: ignore 3681 after = exp.var(text) 3682 else: 3683 after = None 3684 3685 if self._match_text_seq("PATTERN"): 3686 self._match_l_paren() 3687 3688 if not self._curr: 3689 self.raise_error("Expecting )", self._curr) 3690 3691 paren = 1 3692 start = self._curr 3693 3694 while self._curr and paren > 0: 3695 if self._curr.token_type == TokenType.L_PAREN: 3696 paren += 1 3697 if self._curr.token_type == TokenType.R_PAREN: 3698 paren -= 1 3699 3700 end = self._prev 3701 self._advance() 3702 3703 if paren > 0: 3704 self.raise_error("Expecting )", self._curr) 3705 3706 pattern = exp.var(self._find_sql(start, end)) 3707 else: 3708 pattern = None 3709 3710 define = ( 3711 self._parse_csv(self._parse_name_as_expression) 3712 if self._match_text_seq("DEFINE") 3713 else None 3714 ) 3715 3716 self._match_r_paren() 3717 3718 return self.expression( 3719 exp.MatchRecognize, 3720 partition_by=partition, 3721 order=order, 3722 measures=measures, 3723 rows=rows, 3724 after=after, 3725 pattern=pattern, 3726 define=define, 3727 alias=self._parse_table_alias(), 3728 ) 3729 3730 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3731 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3732 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3733 cross_apply = False 3734 3735 if cross_apply is not None: 3736 this = self._parse_select(table=True) 3737 view = None 3738 outer = None 3739 elif self._match(TokenType.LATERAL): 3740 this = self._parse_select(table=True) 3741 view = self._match(TokenType.VIEW) 3742 outer = self._match(TokenType.OUTER) 3743 else: 3744 return None 3745 3746 if not this: 3747 this = ( 3748 self._parse_unnest() 3749 or self._parse_function() 3750 or self._parse_id_var(any_token=False) 3751 ) 3752 3753 while self._match(TokenType.DOT): 3754 this = exp.Dot( 3755 this=this, 3756 expression=self._parse_function() or self._parse_id_var(any_token=False), 3757 ) 3758 3759 ordinality: t.Optional[bool] = None 3760 3761 if view: 3762 table = self._parse_id_var(any_token=False) 3763 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3764 table_alias: t.Optional[exp.TableAlias] = self.expression( 3765 exp.TableAlias, this=table, columns=columns 3766 ) 3767 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3768 # We move the alias from the lateral's child node to the lateral itself 3769 table_alias = this.args["alias"].pop() 3770 else: 3771 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3772 table_alias = self._parse_table_alias() 3773 3774 return self.expression( 3775 exp.Lateral, 3776 this=this, 3777 view=view, 3778 outer=outer, 3779 alias=table_alias, 3780 cross_apply=cross_apply, 3781 ordinality=ordinality, 3782 ) 3783 3784 def _parse_join_parts( 3785 self, 3786 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3787 return ( 3788 self._match_set(self.JOIN_METHODS) and self._prev, 3789 self._match_set(self.JOIN_SIDES) and self._prev, 3790 self._match_set(self.JOIN_KINDS) and self._prev, 3791 ) 3792 3793 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3794 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3795 this = self._parse_column() 3796 if isinstance(this, exp.Column): 3797 return this.this 3798 return this 3799 3800 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3801 3802 def _parse_join( 3803 self, skip_join_token: bool = False, parse_bracket: bool = False 3804 ) -> t.Optional[exp.Join]: 3805 if self._match(TokenType.COMMA): 3806 table = self._try_parse(self._parse_table) 3807 cross_join = self.expression(exp.Join, this=table) if table else None 3808 3809 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3810 cross_join.set("kind", "CROSS") 3811 3812 return cross_join 3813 3814 index = self._index 3815 method, side, kind = self._parse_join_parts() 3816 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3817 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3818 join_comments = self._prev_comments 3819 3820 if not skip_join_token and not join: 3821 self._retreat(index) 3822 kind = None 3823 method = None 3824 side = None 3825 3826 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3827 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3828 3829 if not skip_join_token and not join and not outer_apply and not cross_apply: 3830 return None 3831 3832 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3833 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3834 kwargs["expressions"] = self._parse_csv( 3835 lambda: self._parse_table(parse_bracket=parse_bracket) 3836 ) 3837 3838 if method: 3839 kwargs["method"] = method.text 3840 if side: 3841 kwargs["side"] = side.text 3842 if kind: 3843 kwargs["kind"] = kind.text 3844 if hint: 3845 kwargs["hint"] = hint 3846 3847 if self._match(TokenType.MATCH_CONDITION): 3848 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3849 3850 if self._match(TokenType.ON): 3851 kwargs["on"] = self._parse_assignment() 3852 elif self._match(TokenType.USING): 3853 kwargs["using"] = self._parse_using_identifiers() 3854 elif ( 3855 not method 3856 and not (outer_apply or cross_apply) 3857 and not isinstance(kwargs["this"], exp.Unnest) 3858 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3859 ): 3860 index = self._index 3861 joins: t.Optional[list] = list(self._parse_joins()) 3862 3863 if joins and self._match(TokenType.ON): 3864 kwargs["on"] = self._parse_assignment() 3865 elif joins and self._match(TokenType.USING): 3866 kwargs["using"] = self._parse_using_identifiers() 3867 else: 3868 joins = None 3869 self._retreat(index) 3870 3871 kwargs["this"].set("joins", joins if joins else None) 3872 3873 kwargs["pivots"] = self._parse_pivots() 3874 3875 comments = [c for token in (method, side, kind) if token for c in token.comments] 3876 comments = (join_comments or []) + comments 3877 return self.expression(exp.Join, comments=comments, **kwargs) 3878 3879 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3880 this = self._parse_assignment() 3881 3882 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3883 return this 3884 3885 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3886 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3887 3888 return this 3889 3890 def _parse_index_params(self) -> exp.IndexParameters: 3891 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3892 3893 if self._match(TokenType.L_PAREN, advance=False): 3894 columns = self._parse_wrapped_csv(self._parse_with_operator) 3895 else: 3896 columns = None 3897 3898 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3899 partition_by = self._parse_partition_by() 3900 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3901 tablespace = ( 3902 self._parse_var(any_token=True) 3903 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3904 else None 3905 ) 3906 where = self._parse_where() 3907 3908 on = self._parse_field() if self._match(TokenType.ON) else None 3909 3910 return self.expression( 3911 exp.IndexParameters, 3912 using=using, 3913 columns=columns, 3914 include=include, 3915 partition_by=partition_by, 3916 where=where, 3917 with_storage=with_storage, 3918 tablespace=tablespace, 3919 on=on, 3920 ) 3921 3922 def _parse_index( 3923 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3924 ) -> t.Optional[exp.Index]: 3925 if index or anonymous: 3926 unique = None 3927 primary = None 3928 amp = None 3929 3930 self._match(TokenType.ON) 3931 self._match(TokenType.TABLE) # hive 3932 table = self._parse_table_parts(schema=True) 3933 else: 3934 unique = self._match(TokenType.UNIQUE) 3935 primary = self._match_text_seq("PRIMARY") 3936 amp = self._match_text_seq("AMP") 3937 3938 if not self._match(TokenType.INDEX): 3939 return None 3940 3941 index = self._parse_id_var() 3942 table = None 3943 3944 params = self._parse_index_params() 3945 3946 return self.expression( 3947 exp.Index, 3948 this=index, 3949 table=table, 3950 unique=unique, 3951 primary=primary, 3952 amp=amp, 3953 params=params, 3954 ) 3955 3956 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3957 hints: t.List[exp.Expression] = [] 3958 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3959 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3960 hints.append( 3961 self.expression( 3962 exp.WithTableHint, 3963 expressions=self._parse_csv( 3964 lambda: self._parse_function() or self._parse_var(any_token=True) 3965 ), 3966 ) 3967 ) 3968 self._match_r_paren() 3969 else: 3970 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3971 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3972 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3973 3974 self._match_set((TokenType.INDEX, TokenType.KEY)) 3975 if self._match(TokenType.FOR): 3976 hint.set("target", self._advance_any() and self._prev.text.upper()) 3977 3978 hint.set("expressions", self._parse_wrapped_id_vars()) 3979 hints.append(hint) 3980 3981 return hints or None 3982 3983 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3984 return ( 3985 (not schema and self._parse_function(optional_parens=False)) 3986 or self._parse_id_var(any_token=False) 3987 or self._parse_string_as_identifier() 3988 or self._parse_placeholder() 3989 ) 3990 3991 def _parse_table_parts( 3992 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3993 ) -> exp.Table: 3994 catalog = None 3995 db = None 3996 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3997 3998 while self._match(TokenType.DOT): 3999 if catalog: 4000 # This allows nesting the table in arbitrarily many dot expressions if needed 4001 table = self.expression( 4002 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 4003 ) 4004 else: 4005 catalog = db 4006 db = table 4007 # "" used for tsql FROM a..b case 4008 table = self._parse_table_part(schema=schema) or "" 4009 4010 if ( 4011 wildcard 4012 and self._is_connected() 4013 and (isinstance(table, exp.Identifier) or not table) 4014 and self._match(TokenType.STAR) 4015 ): 4016 if isinstance(table, exp.Identifier): 4017 table.args["this"] += "*" 4018 else: 4019 table = exp.Identifier(this="*") 4020 4021 # We bubble up comments from the Identifier to the Table 4022 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 4023 4024 if is_db_reference: 4025 catalog = db 4026 db = table 4027 table = None 4028 4029 if not table and not is_db_reference: 4030 self.raise_error(f"Expected table name but got {self._curr}") 4031 if not db and is_db_reference: 4032 self.raise_error(f"Expected database name but got {self._curr}") 4033 4034 table = self.expression( 4035 exp.Table, 4036 comments=comments, 4037 this=table, 4038 db=db, 4039 catalog=catalog, 4040 ) 4041 4042 changes = self._parse_changes() 4043 if changes: 4044 table.set("changes", changes) 4045 4046 at_before = self._parse_historical_data() 4047 if at_before: 4048 table.set("when", at_before) 4049 4050 pivots = self._parse_pivots() 4051 if pivots: 4052 table.set("pivots", pivots) 4053 4054 return table 4055 4056 def _parse_table( 4057 self, 4058 schema: bool = False, 4059 joins: bool = False, 4060 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4061 parse_bracket: bool = False, 4062 is_db_reference: bool = False, 4063 parse_partition: bool = False, 4064 consume_pipe: bool = False, 4065 ) -> t.Optional[exp.Expression]: 4066 lateral = self._parse_lateral() 4067 if lateral: 4068 return lateral 4069 4070 unnest = self._parse_unnest() 4071 if unnest: 4072 return unnest 4073 4074 values = self._parse_derived_table_values() 4075 if values: 4076 return values 4077 4078 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4079 if subquery: 4080 if not subquery.args.get("pivots"): 4081 subquery.set("pivots", self._parse_pivots()) 4082 return subquery 4083 4084 bracket = parse_bracket and self._parse_bracket(None) 4085 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4086 4087 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4088 self._parse_table 4089 ) 4090 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4091 4092 only = self._match(TokenType.ONLY) 4093 4094 this = t.cast( 4095 exp.Expression, 4096 bracket 4097 or rows_from 4098 or self._parse_bracket( 4099 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4100 ), 4101 ) 4102 4103 if only: 4104 this.set("only", only) 4105 4106 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4107 self._match_text_seq("*") 4108 4109 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4110 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4111 this.set("partition", self._parse_partition()) 4112 4113 if schema: 4114 return self._parse_schema(this=this) 4115 4116 version = self._parse_version() 4117 4118 if version: 4119 this.set("version", version) 4120 4121 if self.dialect.ALIAS_POST_TABLESAMPLE: 4122 this.set("sample", self._parse_table_sample()) 4123 4124 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4125 if alias: 4126 this.set("alias", alias) 4127 4128 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4129 return self.expression( 4130 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4131 ) 4132 4133 this.set("hints", self._parse_table_hints()) 4134 4135 if not this.args.get("pivots"): 4136 this.set("pivots", self._parse_pivots()) 4137 4138 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4139 this.set("sample", self._parse_table_sample()) 4140 4141 if joins: 4142 for join in self._parse_joins(): 4143 this.append("joins", join) 4144 4145 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4146 this.set("ordinality", True) 4147 this.set("alias", self._parse_table_alias()) 4148 4149 return this 4150 4151 def _parse_version(self) -> t.Optional[exp.Version]: 4152 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4153 this = "TIMESTAMP" 4154 elif self._match(TokenType.VERSION_SNAPSHOT): 4155 this = "VERSION" 4156 else: 4157 return None 4158 4159 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4160 kind = self._prev.text.upper() 4161 start = self._parse_bitwise() 4162 self._match_texts(("TO", "AND")) 4163 end = self._parse_bitwise() 4164 expression: t.Optional[exp.Expression] = self.expression( 4165 exp.Tuple, expressions=[start, end] 4166 ) 4167 elif self._match_text_seq("CONTAINED", "IN"): 4168 kind = "CONTAINED IN" 4169 expression = self.expression( 4170 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4171 ) 4172 elif self._match(TokenType.ALL): 4173 kind = "ALL" 4174 expression = None 4175 else: 4176 self._match_text_seq("AS", "OF") 4177 kind = "AS OF" 4178 expression = self._parse_type() 4179 4180 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4181 4182 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4183 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4184 index = self._index 4185 historical_data = None 4186 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4187 this = self._prev.text.upper() 4188 kind = ( 4189 self._match(TokenType.L_PAREN) 4190 and self._match_texts(self.HISTORICAL_DATA_KIND) 4191 and self._prev.text.upper() 4192 ) 4193 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4194 4195 if expression: 4196 self._match_r_paren() 4197 historical_data = self.expression( 4198 exp.HistoricalData, this=this, kind=kind, expression=expression 4199 ) 4200 else: 4201 self._retreat(index) 4202 4203 return historical_data 4204 4205 def _parse_changes(self) -> t.Optional[exp.Changes]: 4206 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4207 return None 4208 4209 information = self._parse_var(any_token=True) 4210 self._match_r_paren() 4211 4212 return self.expression( 4213 exp.Changes, 4214 information=information, 4215 at_before=self._parse_historical_data(), 4216 end=self._parse_historical_data(), 4217 ) 4218 4219 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4220 if not self._match(TokenType.UNNEST): 4221 return None 4222 4223 expressions = self._parse_wrapped_csv(self._parse_equality) 4224 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4225 4226 alias = self._parse_table_alias() if with_alias else None 4227 4228 if alias: 4229 if self.dialect.UNNEST_COLUMN_ONLY: 4230 if alias.args.get("columns"): 4231 self.raise_error("Unexpected extra column alias in unnest.") 4232 4233 alias.set("columns", [alias.this]) 4234 alias.set("this", None) 4235 4236 columns = alias.args.get("columns") or [] 4237 if offset and len(expressions) < len(columns): 4238 offset = columns.pop() 4239 4240 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4241 self._match(TokenType.ALIAS) 4242 offset = self._parse_id_var( 4243 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4244 ) or exp.to_identifier("offset") 4245 4246 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4247 4248 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4249 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4250 if not is_derived and not ( 4251 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4252 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4253 ): 4254 return None 4255 4256 expressions = self._parse_csv(self._parse_value) 4257 alias = self._parse_table_alias() 4258 4259 if is_derived: 4260 self._match_r_paren() 4261 4262 return self.expression( 4263 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4264 ) 4265 4266 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4267 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4268 as_modifier and self._match_text_seq("USING", "SAMPLE") 4269 ): 4270 return None 4271 4272 bucket_numerator = None 4273 bucket_denominator = None 4274 bucket_field = None 4275 percent = None 4276 size = None 4277 seed = None 4278 4279 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4280 matched_l_paren = self._match(TokenType.L_PAREN) 4281 4282 if self.TABLESAMPLE_CSV: 4283 num = None 4284 expressions = self._parse_csv(self._parse_primary) 4285 else: 4286 expressions = None 4287 num = ( 4288 self._parse_factor() 4289 if self._match(TokenType.NUMBER, advance=False) 4290 else self._parse_primary() or self._parse_placeholder() 4291 ) 4292 4293 if self._match_text_seq("BUCKET"): 4294 bucket_numerator = self._parse_number() 4295 self._match_text_seq("OUT", "OF") 4296 bucket_denominator = bucket_denominator = self._parse_number() 4297 self._match(TokenType.ON) 4298 bucket_field = self._parse_field() 4299 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4300 percent = num 4301 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4302 size = num 4303 else: 4304 percent = num 4305 4306 if matched_l_paren: 4307 self._match_r_paren() 4308 4309 if self._match(TokenType.L_PAREN): 4310 method = self._parse_var(upper=True) 4311 seed = self._match(TokenType.COMMA) and self._parse_number() 4312 self._match_r_paren() 4313 elif self._match_texts(("SEED", "REPEATABLE")): 4314 seed = self._parse_wrapped(self._parse_number) 4315 4316 if not method and self.DEFAULT_SAMPLING_METHOD: 4317 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4318 4319 return self.expression( 4320 exp.TableSample, 4321 expressions=expressions, 4322 method=method, 4323 bucket_numerator=bucket_numerator, 4324 bucket_denominator=bucket_denominator, 4325 bucket_field=bucket_field, 4326 percent=percent, 4327 size=size, 4328 seed=seed, 4329 ) 4330 4331 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4332 return list(iter(self._parse_pivot, None)) or None 4333 4334 def _parse_joins(self) -> t.Iterator[exp.Join]: 4335 return iter(self._parse_join, None) 4336 4337 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4338 if not self._match(TokenType.INTO): 4339 return None 4340 4341 return self.expression( 4342 exp.UnpivotColumns, 4343 this=self._match_text_seq("NAME") and self._parse_column(), 4344 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4345 ) 4346 4347 # https://duckdb.org/docs/sql/statements/pivot 4348 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4349 def _parse_on() -> t.Optional[exp.Expression]: 4350 this = self._parse_bitwise() 4351 4352 if self._match(TokenType.IN): 4353 # PIVOT ... ON col IN (row_val1, row_val2) 4354 return self._parse_in(this) 4355 if self._match(TokenType.ALIAS, advance=False): 4356 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4357 return self._parse_alias(this) 4358 4359 return this 4360 4361 this = self._parse_table() 4362 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4363 into = self._parse_unpivot_columns() 4364 using = self._match(TokenType.USING) and self._parse_csv( 4365 lambda: self._parse_alias(self._parse_function()) 4366 ) 4367 group = self._parse_group() 4368 4369 return self.expression( 4370 exp.Pivot, 4371 this=this, 4372 expressions=expressions, 4373 using=using, 4374 group=group, 4375 unpivot=is_unpivot, 4376 into=into, 4377 ) 4378 4379 def _parse_pivot_in(self) -> exp.In: 4380 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4381 this = self._parse_select_or_expression() 4382 4383 self._match(TokenType.ALIAS) 4384 alias = self._parse_bitwise() 4385 if alias: 4386 if isinstance(alias, exp.Column) and not alias.db: 4387 alias = alias.this 4388 return self.expression(exp.PivotAlias, this=this, alias=alias) 4389 4390 return this 4391 4392 value = self._parse_column() 4393 4394 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4395 self.raise_error("Expecting IN (") 4396 4397 if self._match(TokenType.ANY): 4398 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4399 else: 4400 exprs = self._parse_csv(_parse_aliased_expression) 4401 4402 self._match_r_paren() 4403 return self.expression(exp.In, this=value, expressions=exprs) 4404 4405 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4406 func = self._parse_function() 4407 if not func: 4408 self.raise_error("Expecting an aggregation function in PIVOT") 4409 4410 return self._parse_alias(func) 4411 4412 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4413 index = self._index 4414 include_nulls = None 4415 4416 if self._match(TokenType.PIVOT): 4417 unpivot = False 4418 elif self._match(TokenType.UNPIVOT): 4419 unpivot = True 4420 4421 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4422 if self._match_text_seq("INCLUDE", "NULLS"): 4423 include_nulls = True 4424 elif self._match_text_seq("EXCLUDE", "NULLS"): 4425 include_nulls = False 4426 else: 4427 return None 4428 4429 expressions = [] 4430 4431 if not self._match(TokenType.L_PAREN): 4432 self._retreat(index) 4433 return None 4434 4435 if unpivot: 4436 expressions = self._parse_csv(self._parse_column) 4437 else: 4438 expressions = self._parse_csv(self._parse_pivot_aggregation) 4439 4440 if not expressions: 4441 self.raise_error("Failed to parse PIVOT's aggregation list") 4442 4443 if not self._match(TokenType.FOR): 4444 self.raise_error("Expecting FOR") 4445 4446 fields = [] 4447 while True: 4448 field = self._try_parse(self._parse_pivot_in) 4449 if not field: 4450 break 4451 fields.append(field) 4452 4453 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4454 self._parse_bitwise 4455 ) 4456 4457 group = self._parse_group() 4458 4459 self._match_r_paren() 4460 4461 pivot = self.expression( 4462 exp.Pivot, 4463 expressions=expressions, 4464 fields=fields, 4465 unpivot=unpivot, 4466 include_nulls=include_nulls, 4467 default_on_null=default_on_null, 4468 group=group, 4469 ) 4470 4471 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4472 pivot.set("alias", self._parse_table_alias()) 4473 4474 if not unpivot: 4475 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4476 4477 columns: t.List[exp.Expression] = [] 4478 all_fields = [] 4479 for pivot_field in pivot.fields: 4480 pivot_field_expressions = pivot_field.expressions 4481 4482 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4483 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4484 continue 4485 4486 all_fields.append( 4487 [ 4488 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4489 for fld in pivot_field_expressions 4490 ] 4491 ) 4492 4493 if all_fields: 4494 if names: 4495 all_fields.append(names) 4496 4497 # Generate all possible combinations of the pivot columns 4498 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4499 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4500 for fld_parts_tuple in itertools.product(*all_fields): 4501 fld_parts = list(fld_parts_tuple) 4502 4503 if names and self.PREFIXED_PIVOT_COLUMNS: 4504 # Move the "name" to the front of the list 4505 fld_parts.insert(0, fld_parts.pop(-1)) 4506 4507 columns.append(exp.to_identifier("_".join(fld_parts))) 4508 4509 pivot.set("columns", columns) 4510 4511 return pivot 4512 4513 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4514 return [agg.alias for agg in aggregations if agg.alias] 4515 4516 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4517 if not skip_where_token and not self._match(TokenType.PREWHERE): 4518 return None 4519 4520 return self.expression( 4521 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4522 ) 4523 4524 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4525 if not skip_where_token and not self._match(TokenType.WHERE): 4526 return None 4527 4528 return self.expression( 4529 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4530 ) 4531 4532 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4533 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4534 return None 4535 comments = self._prev_comments 4536 4537 elements: t.Dict[str, t.Any] = defaultdict(list) 4538 4539 if self._match(TokenType.ALL): 4540 elements["all"] = True 4541 elif self._match(TokenType.DISTINCT): 4542 elements["all"] = False 4543 4544 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4545 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4546 4547 while True: 4548 index = self._index 4549 4550 elements["expressions"].extend( 4551 self._parse_csv( 4552 lambda: None 4553 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4554 else self._parse_assignment() 4555 ) 4556 ) 4557 4558 before_with_index = self._index 4559 with_prefix = self._match(TokenType.WITH) 4560 4561 if self._match(TokenType.ROLLUP): 4562 elements["rollup"].append( 4563 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4564 ) 4565 elif self._match(TokenType.CUBE): 4566 elements["cube"].append( 4567 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4568 ) 4569 elif self._match(TokenType.GROUPING_SETS): 4570 elements["grouping_sets"].append( 4571 self.expression( 4572 exp.GroupingSets, 4573 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4574 ) 4575 ) 4576 elif self._match_text_seq("TOTALS"): 4577 elements["totals"] = True # type: ignore 4578 4579 if before_with_index <= self._index <= before_with_index + 1: 4580 self._retreat(before_with_index) 4581 break 4582 4583 if index == self._index: 4584 break 4585 4586 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4587 4588 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4589 return self.expression( 4590 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4591 ) 4592 4593 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4594 if self._match(TokenType.L_PAREN): 4595 grouping_set = self._parse_csv(self._parse_column) 4596 self._match_r_paren() 4597 return self.expression(exp.Tuple, expressions=grouping_set) 4598 4599 return self._parse_column() 4600 4601 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4602 if not skip_having_token and not self._match(TokenType.HAVING): 4603 return None 4604 return self.expression( 4605 exp.Having, comments=self._prev_comments, this=self._parse_assignment() 4606 ) 4607 4608 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4609 if not self._match(TokenType.QUALIFY): 4610 return None 4611 return self.expression(exp.Qualify, this=self._parse_assignment()) 4612 4613 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4614 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4615 exp.Prior, this=self._parse_bitwise() 4616 ) 4617 connect = self._parse_assignment() 4618 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4619 return connect 4620 4621 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4622 if skip_start_token: 4623 start = None 4624 elif self._match(TokenType.START_WITH): 4625 start = self._parse_assignment() 4626 else: 4627 return None 4628 4629 self._match(TokenType.CONNECT_BY) 4630 nocycle = self._match_text_seq("NOCYCLE") 4631 connect = self._parse_connect_with_prior() 4632 4633 if not start and self._match(TokenType.START_WITH): 4634 start = self._parse_assignment() 4635 4636 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4637 4638 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4639 this = self._parse_id_var(any_token=True) 4640 if self._match(TokenType.ALIAS): 4641 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4642 return this 4643 4644 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4645 if self._match_text_seq("INTERPOLATE"): 4646 return self._parse_wrapped_csv(self._parse_name_as_expression) 4647 return None 4648 4649 def _parse_order( 4650 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4651 ) -> t.Optional[exp.Expression]: 4652 siblings = None 4653 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4654 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4655 return this 4656 4657 siblings = True 4658 4659 return self.expression( 4660 exp.Order, 4661 comments=self._prev_comments, 4662 this=this, 4663 expressions=self._parse_csv(self._parse_ordered), 4664 siblings=siblings, 4665 ) 4666 4667 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4668 if not self._match(token): 4669 return None 4670 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4671 4672 def _parse_ordered( 4673 self, parse_method: t.Optional[t.Callable] = None 4674 ) -> t.Optional[exp.Ordered]: 4675 this = parse_method() if parse_method else self._parse_assignment() 4676 if not this: 4677 return None 4678 4679 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4680 this = exp.var("ALL") 4681 4682 asc = self._match(TokenType.ASC) 4683 desc = self._match(TokenType.DESC) or (asc and False) 4684 4685 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4686 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4687 4688 nulls_first = is_nulls_first or False 4689 explicitly_null_ordered = is_nulls_first or is_nulls_last 4690 4691 if ( 4692 not explicitly_null_ordered 4693 and ( 4694 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4695 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4696 ) 4697 and self.dialect.NULL_ORDERING != "nulls_are_last" 4698 ): 4699 nulls_first = True 4700 4701 if self._match_text_seq("WITH", "FILL"): 4702 with_fill = self.expression( 4703 exp.WithFill, 4704 **{ # type: ignore 4705 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4706 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4707 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4708 "interpolate": self._parse_interpolate(), 4709 }, 4710 ) 4711 else: 4712 with_fill = None 4713 4714 return self.expression( 4715 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4716 ) 4717 4718 def _parse_limit_options(self) -> exp.LimitOptions: 4719 percent = self._match(TokenType.PERCENT) 4720 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4721 self._match_text_seq("ONLY") 4722 with_ties = self._match_text_seq("WITH", "TIES") 4723 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4724 4725 def _parse_limit( 4726 self, 4727 this: t.Optional[exp.Expression] = None, 4728 top: bool = False, 4729 skip_limit_token: bool = False, 4730 ) -> t.Optional[exp.Expression]: 4731 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4732 comments = self._prev_comments 4733 if top: 4734 limit_paren = self._match(TokenType.L_PAREN) 4735 expression = self._parse_term() if limit_paren else self._parse_number() 4736 4737 if limit_paren: 4738 self._match_r_paren() 4739 4740 limit_options = self._parse_limit_options() 4741 else: 4742 limit_options = None 4743 expression = self._parse_term() 4744 4745 if self._match(TokenType.COMMA): 4746 offset = expression 4747 expression = self._parse_term() 4748 else: 4749 offset = None 4750 4751 limit_exp = self.expression( 4752 exp.Limit, 4753 this=this, 4754 expression=expression, 4755 offset=offset, 4756 comments=comments, 4757 limit_options=limit_options, 4758 expressions=self._parse_limit_by(), 4759 ) 4760 4761 return limit_exp 4762 4763 if self._match(TokenType.FETCH): 4764 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4765 direction = self._prev.text.upper() if direction else "FIRST" 4766 4767 count = self._parse_field(tokens=self.FETCH_TOKENS) 4768 4769 return self.expression( 4770 exp.Fetch, 4771 direction=direction, 4772 count=count, 4773 limit_options=self._parse_limit_options(), 4774 ) 4775 4776 return this 4777 4778 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4779 if not self._match(TokenType.OFFSET): 4780 return this 4781 4782 count = self._parse_term() 4783 self._match_set((TokenType.ROW, TokenType.ROWS)) 4784 4785 return self.expression( 4786 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4787 ) 4788 4789 def _can_parse_limit_or_offset(self) -> bool: 4790 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4791 return False 4792 4793 index = self._index 4794 result = bool( 4795 self._try_parse(self._parse_limit, retreat=True) 4796 or self._try_parse(self._parse_offset, retreat=True) 4797 ) 4798 self._retreat(index) 4799 return result 4800 4801 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4802 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4803 4804 def _parse_locks(self) -> t.List[exp.Lock]: 4805 locks = [] 4806 while True: 4807 update, key = None, None 4808 if self._match_text_seq("FOR", "UPDATE"): 4809 update = True 4810 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4811 "LOCK", "IN", "SHARE", "MODE" 4812 ): 4813 update = False 4814 elif self._match_text_seq("FOR", "KEY", "SHARE"): 4815 update, key = False, True 4816 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 4817 update, key = True, True 4818 else: 4819 break 4820 4821 expressions = None 4822 if self._match_text_seq("OF"): 4823 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4824 4825 wait: t.Optional[bool | exp.Expression] = None 4826 if self._match_text_seq("NOWAIT"): 4827 wait = True 4828 elif self._match_text_seq("WAIT"): 4829 wait = self._parse_primary() 4830 elif self._match_text_seq("SKIP", "LOCKED"): 4831 wait = False 4832 4833 locks.append( 4834 self.expression( 4835 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 4836 ) 4837 ) 4838 4839 return locks 4840 4841 def parse_set_operation( 4842 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4843 ) -> t.Optional[exp.Expression]: 4844 start = self._index 4845 _, side_token, kind_token = self._parse_join_parts() 4846 4847 side = side_token.text if side_token else None 4848 kind = kind_token.text if kind_token else None 4849 4850 if not self._match_set(self.SET_OPERATIONS): 4851 self._retreat(start) 4852 return None 4853 4854 token_type = self._prev.token_type 4855 4856 if token_type == TokenType.UNION: 4857 operation: t.Type[exp.SetOperation] = exp.Union 4858 elif token_type == TokenType.EXCEPT: 4859 operation = exp.Except 4860 else: 4861 operation = exp.Intersect 4862 4863 comments = self._prev.comments 4864 4865 if self._match(TokenType.DISTINCT): 4866 distinct: t.Optional[bool] = True 4867 elif self._match(TokenType.ALL): 4868 distinct = False 4869 else: 4870 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4871 if distinct is None: 4872 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4873 4874 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4875 "STRICT", "CORRESPONDING" 4876 ) 4877 if self._match_text_seq("CORRESPONDING"): 4878 by_name = True 4879 if not side and not kind: 4880 kind = "INNER" 4881 4882 on_column_list = None 4883 if by_name and self._match_texts(("ON", "BY")): 4884 on_column_list = self._parse_wrapped_csv(self._parse_column) 4885 4886 expression = self._parse_select( 4887 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4888 ) 4889 4890 return self.expression( 4891 operation, 4892 comments=comments, 4893 this=this, 4894 distinct=distinct, 4895 by_name=by_name, 4896 expression=expression, 4897 side=side, 4898 kind=kind, 4899 on=on_column_list, 4900 ) 4901 4902 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4903 while this: 4904 setop = self.parse_set_operation(this) 4905 if not setop: 4906 break 4907 this = setop 4908 4909 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4910 expression = this.expression 4911 4912 if expression: 4913 for arg in self.SET_OP_MODIFIERS: 4914 expr = expression.args.get(arg) 4915 if expr: 4916 this.set(arg, expr.pop()) 4917 4918 return this 4919 4920 def _parse_expression(self) -> t.Optional[exp.Expression]: 4921 return self._parse_alias(self._parse_assignment()) 4922 4923 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4924 this = self._parse_disjunction() 4925 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4926 # This allows us to parse <non-identifier token> := <expr> 4927 this = exp.column( 4928 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4929 ) 4930 4931 while self._match_set(self.ASSIGNMENT): 4932 if isinstance(this, exp.Column) and len(this.parts) == 1: 4933 this = this.this 4934 4935 this = self.expression( 4936 self.ASSIGNMENT[self._prev.token_type], 4937 this=this, 4938 comments=self._prev_comments, 4939 expression=self._parse_assignment(), 4940 ) 4941 4942 return this 4943 4944 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4945 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4946 4947 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4948 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4949 4950 def _parse_equality(self) -> t.Optional[exp.Expression]: 4951 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4952 4953 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4954 return self._parse_tokens(self._parse_range, self.COMPARISON) 4955 4956 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4957 this = this or self._parse_bitwise() 4958 negate = self._match(TokenType.NOT) 4959 4960 if self._match_set(self.RANGE_PARSERS): 4961 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4962 if not expression: 4963 return this 4964 4965 this = expression 4966 elif self._match(TokenType.ISNULL): 4967 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4968 4969 # Postgres supports ISNULL and NOTNULL for conditions. 4970 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4971 if self._match(TokenType.NOTNULL): 4972 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4973 this = self.expression(exp.Not, this=this) 4974 4975 if negate: 4976 this = self._negate_range(this) 4977 4978 if self._match(TokenType.IS): 4979 this = self._parse_is(this) 4980 4981 return this 4982 4983 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4984 if not this: 4985 return this 4986 4987 return self.expression(exp.Not, this=this) 4988 4989 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4990 index = self._index - 1 4991 negate = self._match(TokenType.NOT) 4992 4993 if self._match_text_seq("DISTINCT", "FROM"): 4994 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4995 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4996 4997 if self._match(TokenType.JSON): 4998 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4999 5000 if self._match_text_seq("WITH"): 5001 _with = True 5002 elif self._match_text_seq("WITHOUT"): 5003 _with = False 5004 else: 5005 _with = None 5006 5007 unique = self._match(TokenType.UNIQUE) 5008 self._match_text_seq("KEYS") 5009 expression: t.Optional[exp.Expression] = self.expression( 5010 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 5011 ) 5012 else: 5013 expression = self._parse_primary() or self._parse_null() 5014 if not expression: 5015 self._retreat(index) 5016 return None 5017 5018 this = self.expression(exp.Is, this=this, expression=expression) 5019 return self.expression(exp.Not, this=this) if negate else this 5020 5021 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 5022 unnest = self._parse_unnest(with_alias=False) 5023 if unnest: 5024 this = self.expression(exp.In, this=this, unnest=unnest) 5025 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 5026 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 5027 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 5028 5029 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 5030 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 5031 else: 5032 this = self.expression(exp.In, this=this, expressions=expressions) 5033 5034 if matched_l_paren: 5035 self._match_r_paren(this) 5036 elif not self._match(TokenType.R_BRACKET, expression=this): 5037 self.raise_error("Expecting ]") 5038 else: 5039 this = self.expression(exp.In, this=this, field=self._parse_column()) 5040 5041 return this 5042 5043 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 5044 symmetric = None 5045 if self._match_text_seq("SYMMETRIC"): 5046 symmetric = True 5047 elif self._match_text_seq("ASYMMETRIC"): 5048 symmetric = False 5049 5050 low = self._parse_bitwise() 5051 self._match(TokenType.AND) 5052 high = self._parse_bitwise() 5053 5054 return self.expression( 5055 exp.Between, 5056 this=this, 5057 low=low, 5058 high=high, 5059 symmetric=symmetric, 5060 ) 5061 5062 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5063 if not self._match(TokenType.ESCAPE): 5064 return this 5065 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 5066 5067 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5068 index = self._index 5069 5070 if not self._match(TokenType.INTERVAL) and match_interval: 5071 return None 5072 5073 if self._match(TokenType.STRING, advance=False): 5074 this = self._parse_primary() 5075 else: 5076 this = self._parse_term() 5077 5078 if not this or ( 5079 isinstance(this, exp.Column) 5080 and not this.table 5081 and not this.this.quoted 5082 and this.name.upper() == "IS" 5083 ): 5084 self._retreat(index) 5085 return None 5086 5087 unit = self._parse_function() or ( 5088 not self._match(TokenType.ALIAS, advance=False) 5089 and self._parse_var(any_token=True, upper=True) 5090 ) 5091 5092 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5093 # each INTERVAL expression into this canonical form so it's easy to transpile 5094 if this and this.is_number: 5095 this = exp.Literal.string(this.to_py()) 5096 elif this and this.is_string: 5097 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5098 if parts and unit: 5099 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5100 unit = None 5101 self._retreat(self._index - 1) 5102 5103 if len(parts) == 1: 5104 this = exp.Literal.string(parts[0][0]) 5105 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5106 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5107 unit = self.expression( 5108 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5109 ) 5110 5111 interval = self.expression(exp.Interval, this=this, unit=unit) 5112 5113 index = self._index 5114 self._match(TokenType.PLUS) 5115 5116 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5117 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5118 return self.expression( 5119 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5120 ) 5121 5122 self._retreat(index) 5123 return interval 5124 5125 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5126 this = self._parse_term() 5127 5128 while True: 5129 if self._match_set(self.BITWISE): 5130 this = self.expression( 5131 self.BITWISE[self._prev.token_type], 5132 this=this, 5133 expression=self._parse_term(), 5134 ) 5135 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5136 this = self.expression( 5137 exp.DPipe, 5138 this=this, 5139 expression=self._parse_term(), 5140 safe=not self.dialect.STRICT_STRING_CONCAT, 5141 ) 5142 elif self._match(TokenType.DQMARK): 5143 this = self.expression( 5144 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5145 ) 5146 elif self._match_pair(TokenType.LT, TokenType.LT): 5147 this = self.expression( 5148 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5149 ) 5150 elif self._match_pair(TokenType.GT, TokenType.GT): 5151 this = self.expression( 5152 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5153 ) 5154 else: 5155 break 5156 5157 return this 5158 5159 def _parse_term(self) -> t.Optional[exp.Expression]: 5160 this = self._parse_factor() 5161 5162 while self._match_set(self.TERM): 5163 klass = self.TERM[self._prev.token_type] 5164 comments = self._prev_comments 5165 expression = self._parse_factor() 5166 5167 this = self.expression(klass, this=this, comments=comments, expression=expression) 5168 5169 if isinstance(this, exp.Collate): 5170 expr = this.expression 5171 5172 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5173 # fallback to Identifier / Var 5174 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5175 ident = expr.this 5176 if isinstance(ident, exp.Identifier): 5177 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5178 5179 return this 5180 5181 def _parse_factor(self) -> t.Optional[exp.Expression]: 5182 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5183 this = parse_method() 5184 5185 while self._match_set(self.FACTOR): 5186 klass = self.FACTOR[self._prev.token_type] 5187 comments = self._prev_comments 5188 expression = parse_method() 5189 5190 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5191 self._retreat(self._index - 1) 5192 return this 5193 5194 this = self.expression(klass, this=this, comments=comments, expression=expression) 5195 5196 if isinstance(this, exp.Div): 5197 this.args["typed"] = self.dialect.TYPED_DIVISION 5198 this.args["safe"] = self.dialect.SAFE_DIVISION 5199 5200 return this 5201 5202 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5203 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5204 5205 def _parse_unary(self) -> t.Optional[exp.Expression]: 5206 if self._match_set(self.UNARY_PARSERS): 5207 return self.UNARY_PARSERS[self._prev.token_type](self) 5208 return self._parse_at_time_zone(self._parse_type()) 5209 5210 def _parse_type( 5211 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5212 ) -> t.Optional[exp.Expression]: 5213 interval = parse_interval and self._parse_interval() 5214 if interval: 5215 return interval 5216 5217 index = self._index 5218 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5219 5220 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5221 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5222 if isinstance(data_type, exp.Cast): 5223 # This constructor can contain ops directly after it, for instance struct unnesting: 5224 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5225 return self._parse_column_ops(data_type) 5226 5227 if data_type: 5228 index2 = self._index 5229 this = self._parse_primary() 5230 5231 if isinstance(this, exp.Literal): 5232 literal = this.name 5233 this = self._parse_column_ops(this) 5234 5235 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5236 if parser: 5237 return parser(self, this, data_type) 5238 5239 if ( 5240 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5241 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5242 and TIME_ZONE_RE.search(literal) 5243 ): 5244 data_type = exp.DataType.build("TIMESTAMPTZ") 5245 5246 return self.expression(exp.Cast, this=this, to=data_type) 5247 5248 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5249 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5250 # 5251 # If the index difference here is greater than 1, that means the parser itself must have 5252 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5253 # 5254 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5255 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5256 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5257 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5258 # 5259 # In these cases, we don't really want to return the converted type, but instead retreat 5260 # and try to parse a Column or Identifier in the section below. 5261 if data_type.expressions and index2 - index > 1: 5262 self._retreat(index2) 5263 return self._parse_column_ops(data_type) 5264 5265 self._retreat(index) 5266 5267 if fallback_to_identifier: 5268 return self._parse_id_var() 5269 5270 this = self._parse_column() 5271 return this and self._parse_column_ops(this) 5272 5273 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5274 this = self._parse_type() 5275 if not this: 5276 return None 5277 5278 if isinstance(this, exp.Column) and not this.table: 5279 this = exp.var(this.name.upper()) 5280 5281 return self.expression( 5282 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5283 ) 5284 5285 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5286 type_name = identifier.name 5287 5288 while self._match(TokenType.DOT): 5289 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5290 5291 return exp.DataType.build(type_name, dialect=self.dialect, udt=True) 5292 5293 def _parse_types( 5294 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5295 ) -> t.Optional[exp.Expression]: 5296 index = self._index 5297 5298 this: t.Optional[exp.Expression] = None 5299 prefix = self._match_text_seq("SYSUDTLIB", ".") 5300 5301 if not self._match_set(self.TYPE_TOKENS): 5302 identifier = allow_identifiers and self._parse_id_var( 5303 any_token=False, tokens=(TokenType.VAR,) 5304 ) 5305 if isinstance(identifier, exp.Identifier): 5306 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5307 5308 if len(tokens) != 1: 5309 self.raise_error("Unexpected identifier", self._prev) 5310 5311 if tokens[0].token_type in self.TYPE_TOKENS: 5312 self._prev = tokens[0] 5313 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5314 this = self._parse_user_defined_type(identifier) 5315 else: 5316 self._retreat(self._index - 1) 5317 return None 5318 else: 5319 return None 5320 5321 type_token = self._prev.token_type 5322 5323 if type_token == TokenType.PSEUDO_TYPE: 5324 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5325 5326 if type_token == TokenType.OBJECT_IDENTIFIER: 5327 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5328 5329 # https://materialize.com/docs/sql/types/map/ 5330 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5331 key_type = self._parse_types( 5332 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5333 ) 5334 if not self._match(TokenType.FARROW): 5335 self._retreat(index) 5336 return None 5337 5338 value_type = self._parse_types( 5339 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5340 ) 5341 if not self._match(TokenType.R_BRACKET): 5342 self._retreat(index) 5343 return None 5344 5345 return exp.DataType( 5346 this=exp.DataType.Type.MAP, 5347 expressions=[key_type, value_type], 5348 nested=True, 5349 prefix=prefix, 5350 ) 5351 5352 nested = type_token in self.NESTED_TYPE_TOKENS 5353 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5354 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5355 expressions = None 5356 maybe_func = False 5357 5358 if self._match(TokenType.L_PAREN): 5359 if is_struct: 5360 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5361 elif nested: 5362 expressions = self._parse_csv( 5363 lambda: self._parse_types( 5364 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5365 ) 5366 ) 5367 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5368 this = expressions[0] 5369 this.set("nullable", True) 5370 self._match_r_paren() 5371 return this 5372 elif type_token in self.ENUM_TYPE_TOKENS: 5373 expressions = self._parse_csv(self._parse_equality) 5374 elif is_aggregate: 5375 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5376 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5377 ) 5378 if not func_or_ident: 5379 return None 5380 expressions = [func_or_ident] 5381 if self._match(TokenType.COMMA): 5382 expressions.extend( 5383 self._parse_csv( 5384 lambda: self._parse_types( 5385 check_func=check_func, 5386 schema=schema, 5387 allow_identifiers=allow_identifiers, 5388 ) 5389 ) 5390 ) 5391 else: 5392 expressions = self._parse_csv(self._parse_type_size) 5393 5394 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5395 if type_token == TokenType.VECTOR and len(expressions) == 2: 5396 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5397 5398 if not expressions or not self._match(TokenType.R_PAREN): 5399 self._retreat(index) 5400 return None 5401 5402 maybe_func = True 5403 5404 values: t.Optional[t.List[exp.Expression]] = None 5405 5406 if nested and self._match(TokenType.LT): 5407 if is_struct: 5408 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5409 else: 5410 expressions = self._parse_csv( 5411 lambda: self._parse_types( 5412 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5413 ) 5414 ) 5415 5416 if not self._match(TokenType.GT): 5417 self.raise_error("Expecting >") 5418 5419 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5420 values = self._parse_csv(self._parse_assignment) 5421 if not values and is_struct: 5422 values = None 5423 self._retreat(self._index - 1) 5424 else: 5425 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5426 5427 if type_token in self.TIMESTAMPS: 5428 if self._match_text_seq("WITH", "TIME", "ZONE"): 5429 maybe_func = False 5430 tz_type = ( 5431 exp.DataType.Type.TIMETZ 5432 if type_token in self.TIMES 5433 else exp.DataType.Type.TIMESTAMPTZ 5434 ) 5435 this = exp.DataType(this=tz_type, expressions=expressions) 5436 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5437 maybe_func = False 5438 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5439 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5440 maybe_func = False 5441 elif type_token == TokenType.INTERVAL: 5442 unit = self._parse_var(upper=True) 5443 if unit: 5444 if self._match_text_seq("TO"): 5445 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5446 5447 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5448 else: 5449 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5450 elif type_token == TokenType.VOID: 5451 this = exp.DataType(this=exp.DataType.Type.NULL) 5452 5453 if maybe_func and check_func: 5454 index2 = self._index 5455 peek = self._parse_string() 5456 5457 if not peek: 5458 self._retreat(index) 5459 return None 5460 5461 self._retreat(index2) 5462 5463 if not this: 5464 if self._match_text_seq("UNSIGNED"): 5465 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5466 if not unsigned_type_token: 5467 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5468 5469 type_token = unsigned_type_token or type_token 5470 5471 this = exp.DataType( 5472 this=exp.DataType.Type[type_token.value], 5473 expressions=expressions, 5474 nested=nested, 5475 prefix=prefix, 5476 ) 5477 5478 # Empty arrays/structs are allowed 5479 if values is not None: 5480 cls = exp.Struct if is_struct else exp.Array 5481 this = exp.cast(cls(expressions=values), this, copy=False) 5482 5483 elif expressions: 5484 this.set("expressions", expressions) 5485 5486 # https://materialize.com/docs/sql/types/list/#type-name 5487 while self._match(TokenType.LIST): 5488 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5489 5490 index = self._index 5491 5492 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5493 matched_array = self._match(TokenType.ARRAY) 5494 5495 while self._curr: 5496 datatype_token = self._prev.token_type 5497 matched_l_bracket = self._match(TokenType.L_BRACKET) 5498 5499 if (not matched_l_bracket and not matched_array) or ( 5500 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5501 ): 5502 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5503 # not to be confused with the fixed size array parsing 5504 break 5505 5506 matched_array = False 5507 values = self._parse_csv(self._parse_assignment) or None 5508 if ( 5509 values 5510 and not schema 5511 and ( 5512 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5513 ) 5514 ): 5515 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5516 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5517 self._retreat(index) 5518 break 5519 5520 this = exp.DataType( 5521 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5522 ) 5523 self._match(TokenType.R_BRACKET) 5524 5525 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5526 converter = self.TYPE_CONVERTERS.get(this.this) 5527 if converter: 5528 this = converter(t.cast(exp.DataType, this)) 5529 5530 return this 5531 5532 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5533 index = self._index 5534 5535 if ( 5536 self._curr 5537 and self._next 5538 and self._curr.token_type in self.TYPE_TOKENS 5539 and self._next.token_type in self.TYPE_TOKENS 5540 ): 5541 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5542 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5543 this = self._parse_id_var() 5544 else: 5545 this = ( 5546 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5547 or self._parse_id_var() 5548 ) 5549 5550 self._match(TokenType.COLON) 5551 5552 if ( 5553 type_required 5554 and not isinstance(this, exp.DataType) 5555 and not self._match_set(self.TYPE_TOKENS, advance=False) 5556 ): 5557 self._retreat(index) 5558 return self._parse_types() 5559 5560 return self._parse_column_def(this) 5561 5562 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5563 if not self._match_text_seq("AT", "TIME", "ZONE"): 5564 return this 5565 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5566 5567 def _parse_column(self) -> t.Optional[exp.Expression]: 5568 this = self._parse_column_reference() 5569 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5570 5571 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5572 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5573 5574 return column 5575 5576 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5577 this = self._parse_field() 5578 if ( 5579 not this 5580 and self._match(TokenType.VALUES, advance=False) 5581 and self.VALUES_FOLLOWED_BY_PAREN 5582 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5583 ): 5584 this = self._parse_id_var() 5585 5586 if isinstance(this, exp.Identifier): 5587 # We bubble up comments from the Identifier to the Column 5588 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5589 5590 return this 5591 5592 def _parse_colon_as_variant_extract( 5593 self, this: t.Optional[exp.Expression] 5594 ) -> t.Optional[exp.Expression]: 5595 casts = [] 5596 json_path = [] 5597 escape = None 5598 5599 while self._match(TokenType.COLON): 5600 start_index = self._index 5601 5602 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5603 path = self._parse_column_ops( 5604 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5605 ) 5606 5607 # The cast :: operator has a lower precedence than the extraction operator :, so 5608 # we rearrange the AST appropriately to avoid casting the JSON path 5609 while isinstance(path, exp.Cast): 5610 casts.append(path.to) 5611 path = path.this 5612 5613 if casts: 5614 dcolon_offset = next( 5615 i 5616 for i, t in enumerate(self._tokens[start_index:]) 5617 if t.token_type == TokenType.DCOLON 5618 ) 5619 end_token = self._tokens[start_index + dcolon_offset - 1] 5620 else: 5621 end_token = self._prev 5622 5623 if path: 5624 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5625 # it'll roundtrip to a string literal in GET_PATH 5626 if isinstance(path, exp.Identifier) and path.quoted: 5627 escape = True 5628 5629 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5630 5631 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5632 # Databricks transforms it back to the colon/dot notation 5633 if json_path: 5634 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5635 5636 if json_path_expr: 5637 json_path_expr.set("escape", escape) 5638 5639 this = self.expression( 5640 exp.JSONExtract, 5641 this=this, 5642 expression=json_path_expr, 5643 variant_extract=True, 5644 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 5645 ) 5646 5647 while casts: 5648 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5649 5650 return this 5651 5652 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5653 return self._parse_types() 5654 5655 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5656 this = self._parse_bracket(this) 5657 5658 while self._match_set(self.COLUMN_OPERATORS): 5659 op_token = self._prev.token_type 5660 op = self.COLUMN_OPERATORS.get(op_token) 5661 5662 if op_token in self.CAST_COLUMN_OPERATORS: 5663 field = self._parse_dcolon() 5664 if not field: 5665 self.raise_error("Expected type") 5666 elif op and self._curr: 5667 field = self._parse_column_reference() or self._parse_bracket() 5668 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5669 field = self._parse_column_ops(field) 5670 else: 5671 field = self._parse_field(any_token=True, anonymous_func=True) 5672 5673 # Function calls can be qualified, e.g., x.y.FOO() 5674 # This converts the final AST to a series of Dots leading to the function call 5675 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5676 if isinstance(field, (exp.Func, exp.Window)) and this: 5677 this = this.transform( 5678 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5679 ) 5680 5681 if op: 5682 this = op(self, this, field) 5683 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5684 this = self.expression( 5685 exp.Column, 5686 comments=this.comments, 5687 this=field, 5688 table=this.this, 5689 db=this.args.get("table"), 5690 catalog=this.args.get("db"), 5691 ) 5692 elif isinstance(field, exp.Window): 5693 # Move the exp.Dot's to the window's function 5694 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5695 field.set("this", window_func) 5696 this = field 5697 else: 5698 this = self.expression(exp.Dot, this=this, expression=field) 5699 5700 if field and field.comments: 5701 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5702 5703 this = self._parse_bracket(this) 5704 5705 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5706 5707 def _parse_paren(self) -> t.Optional[exp.Expression]: 5708 if not self._match(TokenType.L_PAREN): 5709 return None 5710 5711 comments = self._prev_comments 5712 query = self._parse_select() 5713 5714 if query: 5715 expressions = [query] 5716 else: 5717 expressions = self._parse_expressions() 5718 5719 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5720 5721 if not this and self._match(TokenType.R_PAREN, advance=False): 5722 this = self.expression(exp.Tuple) 5723 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5724 this = self._parse_subquery(this=this, parse_alias=False) 5725 elif isinstance(this, exp.Subquery): 5726 this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False) 5727 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5728 this = self.expression(exp.Tuple, expressions=expressions) 5729 else: 5730 this = self.expression(exp.Paren, this=this) 5731 5732 if this: 5733 this.add_comments(comments) 5734 5735 self._match_r_paren(expression=this) 5736 return this 5737 5738 def _parse_primary(self) -> t.Optional[exp.Expression]: 5739 if self._match_set(self.PRIMARY_PARSERS): 5740 token_type = self._prev.token_type 5741 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5742 5743 if token_type == TokenType.STRING: 5744 expressions = [primary] 5745 while self._match(TokenType.STRING): 5746 expressions.append(exp.Literal.string(self._prev.text)) 5747 5748 if len(expressions) > 1: 5749 return self.expression(exp.Concat, expressions=expressions) 5750 5751 return primary 5752 5753 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5754 return exp.Literal.number(f"0.{self._prev.text}") 5755 5756 return self._parse_paren() 5757 5758 def _parse_field( 5759 self, 5760 any_token: bool = False, 5761 tokens: t.Optional[t.Collection[TokenType]] = None, 5762 anonymous_func: bool = False, 5763 ) -> t.Optional[exp.Expression]: 5764 if anonymous_func: 5765 field = ( 5766 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5767 or self._parse_primary() 5768 ) 5769 else: 5770 field = self._parse_primary() or self._parse_function( 5771 anonymous=anonymous_func, any_token=any_token 5772 ) 5773 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5774 5775 def _parse_function( 5776 self, 5777 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5778 anonymous: bool = False, 5779 optional_parens: bool = True, 5780 any_token: bool = False, 5781 ) -> t.Optional[exp.Expression]: 5782 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5783 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5784 fn_syntax = False 5785 if ( 5786 self._match(TokenType.L_BRACE, advance=False) 5787 and self._next 5788 and self._next.text.upper() == "FN" 5789 ): 5790 self._advance(2) 5791 fn_syntax = True 5792 5793 func = self._parse_function_call( 5794 functions=functions, 5795 anonymous=anonymous, 5796 optional_parens=optional_parens, 5797 any_token=any_token, 5798 ) 5799 5800 if fn_syntax: 5801 self._match(TokenType.R_BRACE) 5802 5803 return func 5804 5805 def _parse_function_call( 5806 self, 5807 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5808 anonymous: bool = False, 5809 optional_parens: bool = True, 5810 any_token: bool = False, 5811 ) -> t.Optional[exp.Expression]: 5812 if not self._curr: 5813 return None 5814 5815 comments = self._curr.comments 5816 prev = self._prev 5817 token = self._curr 5818 token_type = self._curr.token_type 5819 this = self._curr.text 5820 upper = this.upper() 5821 5822 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5823 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5824 self._advance() 5825 return self._parse_window(parser(self)) 5826 5827 if not self._next or self._next.token_type != TokenType.L_PAREN: 5828 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5829 self._advance() 5830 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5831 5832 return None 5833 5834 if any_token: 5835 if token_type in self.RESERVED_TOKENS: 5836 return None 5837 elif token_type not in self.FUNC_TOKENS: 5838 return None 5839 5840 self._advance(2) 5841 5842 parser = self.FUNCTION_PARSERS.get(upper) 5843 if parser and not anonymous: 5844 this = parser(self) 5845 else: 5846 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5847 5848 if subquery_predicate: 5849 expr = None 5850 if self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5851 expr = self._parse_select() 5852 self._match_r_paren() 5853 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 5854 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 5855 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 5856 self._advance(-1) 5857 expr = self._parse_bitwise() 5858 5859 if expr: 5860 return self.expression(subquery_predicate, comments=comments, this=expr) 5861 5862 if functions is None: 5863 functions = self.FUNCTIONS 5864 5865 function = functions.get(upper) 5866 known_function = function and not anonymous 5867 5868 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5869 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5870 5871 post_func_comments = self._curr and self._curr.comments 5872 if known_function and post_func_comments: 5873 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5874 # call we'll construct it as exp.Anonymous, even if it's "known" 5875 if any( 5876 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5877 for comment in post_func_comments 5878 ): 5879 known_function = False 5880 5881 if alias and known_function: 5882 args = self._kv_to_prop_eq(args) 5883 5884 if known_function: 5885 func_builder = t.cast(t.Callable, function) 5886 5887 if "dialect" in func_builder.__code__.co_varnames: 5888 func = func_builder(args, dialect=self.dialect) 5889 else: 5890 func = func_builder(args) 5891 5892 func = self.validate_expression(func, args) 5893 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5894 func.meta["name"] = this 5895 5896 this = func 5897 else: 5898 if token_type == TokenType.IDENTIFIER: 5899 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5900 5901 this = self.expression(exp.Anonymous, this=this, expressions=args) 5902 this = this.update_positions(token) 5903 5904 if isinstance(this, exp.Expression): 5905 this.add_comments(comments) 5906 5907 self._match_r_paren(this) 5908 return self._parse_window(this) 5909 5910 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5911 return expression 5912 5913 def _kv_to_prop_eq( 5914 self, expressions: t.List[exp.Expression], parse_map: bool = False 5915 ) -> t.List[exp.Expression]: 5916 transformed = [] 5917 5918 for index, e in enumerate(expressions): 5919 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5920 if isinstance(e, exp.Alias): 5921 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5922 5923 if not isinstance(e, exp.PropertyEQ): 5924 e = self.expression( 5925 exp.PropertyEQ, 5926 this=e.this if parse_map else exp.to_identifier(e.this.name), 5927 expression=e.expression, 5928 ) 5929 5930 if isinstance(e.this, exp.Column): 5931 e.this.replace(e.this.this) 5932 else: 5933 e = self._to_prop_eq(e, index) 5934 5935 transformed.append(e) 5936 5937 return transformed 5938 5939 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5940 return self._parse_statement() 5941 5942 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5943 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5944 5945 def _parse_user_defined_function( 5946 self, kind: t.Optional[TokenType] = None 5947 ) -> t.Optional[exp.Expression]: 5948 this = self._parse_table_parts(schema=True) 5949 5950 if not self._match(TokenType.L_PAREN): 5951 return this 5952 5953 expressions = self._parse_csv(self._parse_function_parameter) 5954 self._match_r_paren() 5955 return self.expression( 5956 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5957 ) 5958 5959 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5960 literal = self._parse_primary() 5961 if literal: 5962 return self.expression(exp.Introducer, this=token.text, expression=literal) 5963 5964 return self._identifier_expression(token) 5965 5966 def _parse_session_parameter(self) -> exp.SessionParameter: 5967 kind = None 5968 this = self._parse_id_var() or self._parse_primary() 5969 5970 if this and self._match(TokenType.DOT): 5971 kind = this.name 5972 this = self._parse_var() or self._parse_primary() 5973 5974 return self.expression(exp.SessionParameter, this=this, kind=kind) 5975 5976 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5977 return self._parse_id_var() 5978 5979 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5980 index = self._index 5981 5982 if self._match(TokenType.L_PAREN): 5983 expressions = t.cast( 5984 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5985 ) 5986 5987 if not self._match(TokenType.R_PAREN): 5988 self._retreat(index) 5989 else: 5990 expressions = [self._parse_lambda_arg()] 5991 5992 if self._match_set(self.LAMBDAS): 5993 return self.LAMBDAS[self._prev.token_type](self, expressions) 5994 5995 self._retreat(index) 5996 5997 this: t.Optional[exp.Expression] 5998 5999 if self._match(TokenType.DISTINCT): 6000 this = self.expression( 6001 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 6002 ) 6003 else: 6004 this = self._parse_select_or_expression(alias=alias) 6005 6006 return self._parse_limit( 6007 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 6008 ) 6009 6010 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6011 index = self._index 6012 if not self._match(TokenType.L_PAREN): 6013 return this 6014 6015 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 6016 # expr can be of both types 6017 if self._match_set(self.SELECT_START_TOKENS): 6018 self._retreat(index) 6019 return this 6020 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 6021 self._match_r_paren() 6022 return self.expression(exp.Schema, this=this, expressions=args) 6023 6024 def _parse_field_def(self) -> t.Optional[exp.Expression]: 6025 return self._parse_column_def(self._parse_field(any_token=True)) 6026 6027 def _parse_column_def( 6028 self, this: t.Optional[exp.Expression], computed_column: bool = True 6029 ) -> t.Optional[exp.Expression]: 6030 # column defs are not really columns, they're identifiers 6031 if isinstance(this, exp.Column): 6032 this = this.this 6033 6034 if not computed_column: 6035 self._match(TokenType.ALIAS) 6036 6037 kind = self._parse_types(schema=True) 6038 6039 if self._match_text_seq("FOR", "ORDINALITY"): 6040 return self.expression(exp.ColumnDef, this=this, ordinality=True) 6041 6042 constraints: t.List[exp.Expression] = [] 6043 6044 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 6045 ("ALIAS", "MATERIALIZED") 6046 ): 6047 persisted = self._prev.text.upper() == "MATERIALIZED" 6048 constraint_kind = exp.ComputedColumnConstraint( 6049 this=self._parse_assignment(), 6050 persisted=persisted or self._match_text_seq("PERSISTED"), 6051 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 6052 ) 6053 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 6054 elif ( 6055 kind 6056 and self._match(TokenType.ALIAS, advance=False) 6057 and ( 6058 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 6059 or (self._next and self._next.token_type == TokenType.L_PAREN) 6060 ) 6061 ): 6062 self._advance() 6063 constraints.append( 6064 self.expression( 6065 exp.ColumnConstraint, 6066 kind=exp.ComputedColumnConstraint( 6067 this=self._parse_disjunction(), 6068 persisted=self._match_texts(("STORED", "VIRTUAL")) 6069 and self._prev.text.upper() == "STORED", 6070 ), 6071 ) 6072 ) 6073 6074 while True: 6075 constraint = self._parse_column_constraint() 6076 if not constraint: 6077 break 6078 constraints.append(constraint) 6079 6080 if not kind and not constraints: 6081 return this 6082 6083 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6084 6085 def _parse_auto_increment( 6086 self, 6087 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6088 start = None 6089 increment = None 6090 order = None 6091 6092 if self._match(TokenType.L_PAREN, advance=False): 6093 args = self._parse_wrapped_csv(self._parse_bitwise) 6094 start = seq_get(args, 0) 6095 increment = seq_get(args, 1) 6096 elif self._match_text_seq("START"): 6097 start = self._parse_bitwise() 6098 self._match_text_seq("INCREMENT") 6099 increment = self._parse_bitwise() 6100 if self._match_text_seq("ORDER"): 6101 order = True 6102 elif self._match_text_seq("NOORDER"): 6103 order = False 6104 6105 if start and increment: 6106 return exp.GeneratedAsIdentityColumnConstraint( 6107 start=start, increment=increment, this=False, order=order 6108 ) 6109 6110 return exp.AutoIncrementColumnConstraint() 6111 6112 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6113 if not self._match_text_seq("REFRESH"): 6114 self._retreat(self._index - 1) 6115 return None 6116 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6117 6118 def _parse_compress(self) -> exp.CompressColumnConstraint: 6119 if self._match(TokenType.L_PAREN, advance=False): 6120 return self.expression( 6121 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6122 ) 6123 6124 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6125 6126 def _parse_generated_as_identity( 6127 self, 6128 ) -> ( 6129 exp.GeneratedAsIdentityColumnConstraint 6130 | exp.ComputedColumnConstraint 6131 | exp.GeneratedAsRowColumnConstraint 6132 ): 6133 if self._match_text_seq("BY", "DEFAULT"): 6134 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6135 this = self.expression( 6136 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6137 ) 6138 else: 6139 self._match_text_seq("ALWAYS") 6140 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6141 6142 self._match(TokenType.ALIAS) 6143 6144 if self._match_text_seq("ROW"): 6145 start = self._match_text_seq("START") 6146 if not start: 6147 self._match(TokenType.END) 6148 hidden = self._match_text_seq("HIDDEN") 6149 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6150 6151 identity = self._match_text_seq("IDENTITY") 6152 6153 if self._match(TokenType.L_PAREN): 6154 if self._match(TokenType.START_WITH): 6155 this.set("start", self._parse_bitwise()) 6156 if self._match_text_seq("INCREMENT", "BY"): 6157 this.set("increment", self._parse_bitwise()) 6158 if self._match_text_seq("MINVALUE"): 6159 this.set("minvalue", self._parse_bitwise()) 6160 if self._match_text_seq("MAXVALUE"): 6161 this.set("maxvalue", self._parse_bitwise()) 6162 6163 if self._match_text_seq("CYCLE"): 6164 this.set("cycle", True) 6165 elif self._match_text_seq("NO", "CYCLE"): 6166 this.set("cycle", False) 6167 6168 if not identity: 6169 this.set("expression", self._parse_range()) 6170 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6171 args = self._parse_csv(self._parse_bitwise) 6172 this.set("start", seq_get(args, 0)) 6173 this.set("increment", seq_get(args, 1)) 6174 6175 self._match_r_paren() 6176 6177 return this 6178 6179 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6180 self._match_text_seq("LENGTH") 6181 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6182 6183 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6184 if self._match_text_seq("NULL"): 6185 return self.expression(exp.NotNullColumnConstraint) 6186 if self._match_text_seq("CASESPECIFIC"): 6187 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6188 if self._match_text_seq("FOR", "REPLICATION"): 6189 return self.expression(exp.NotForReplicationColumnConstraint) 6190 6191 # Unconsume the `NOT` token 6192 self._retreat(self._index - 1) 6193 return None 6194 6195 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6196 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6197 6198 procedure_option_follows = ( 6199 self._match(TokenType.WITH, advance=False) 6200 and self._next 6201 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6202 ) 6203 6204 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6205 return self.expression( 6206 exp.ColumnConstraint, 6207 this=this, 6208 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6209 ) 6210 6211 return this 6212 6213 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6214 if not self._match(TokenType.CONSTRAINT): 6215 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6216 6217 return self.expression( 6218 exp.Constraint, 6219 this=self._parse_id_var(), 6220 expressions=self._parse_unnamed_constraints(), 6221 ) 6222 6223 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6224 constraints = [] 6225 while True: 6226 constraint = self._parse_unnamed_constraint() or self._parse_function() 6227 if not constraint: 6228 break 6229 constraints.append(constraint) 6230 6231 return constraints 6232 6233 def _parse_unnamed_constraint( 6234 self, constraints: t.Optional[t.Collection[str]] = None 6235 ) -> t.Optional[exp.Expression]: 6236 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6237 constraints or self.CONSTRAINT_PARSERS 6238 ): 6239 return None 6240 6241 constraint = self._prev.text.upper() 6242 if constraint not in self.CONSTRAINT_PARSERS: 6243 self.raise_error(f"No parser found for schema constraint {constraint}.") 6244 6245 return self.CONSTRAINT_PARSERS[constraint](self) 6246 6247 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6248 return self._parse_id_var(any_token=False) 6249 6250 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6251 self._match_texts(("KEY", "INDEX")) 6252 return self.expression( 6253 exp.UniqueColumnConstraint, 6254 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6255 this=self._parse_schema(self._parse_unique_key()), 6256 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6257 on_conflict=self._parse_on_conflict(), 6258 options=self._parse_key_constraint_options(), 6259 ) 6260 6261 def _parse_key_constraint_options(self) -> t.List[str]: 6262 options = [] 6263 while True: 6264 if not self._curr: 6265 break 6266 6267 if self._match(TokenType.ON): 6268 action = None 6269 on = self._advance_any() and self._prev.text 6270 6271 if self._match_text_seq("NO", "ACTION"): 6272 action = "NO ACTION" 6273 elif self._match_text_seq("CASCADE"): 6274 action = "CASCADE" 6275 elif self._match_text_seq("RESTRICT"): 6276 action = "RESTRICT" 6277 elif self._match_pair(TokenType.SET, TokenType.NULL): 6278 action = "SET NULL" 6279 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6280 action = "SET DEFAULT" 6281 else: 6282 self.raise_error("Invalid key constraint") 6283 6284 options.append(f"ON {on} {action}") 6285 else: 6286 var = self._parse_var_from_options( 6287 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6288 ) 6289 if not var: 6290 break 6291 options.append(var.name) 6292 6293 return options 6294 6295 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6296 if match and not self._match(TokenType.REFERENCES): 6297 return None 6298 6299 expressions = None 6300 this = self._parse_table(schema=True) 6301 options = self._parse_key_constraint_options() 6302 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6303 6304 def _parse_foreign_key(self) -> exp.ForeignKey: 6305 expressions = ( 6306 self._parse_wrapped_id_vars() 6307 if not self._match(TokenType.REFERENCES, advance=False) 6308 else None 6309 ) 6310 reference = self._parse_references() 6311 on_options = {} 6312 6313 while self._match(TokenType.ON): 6314 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6315 self.raise_error("Expected DELETE or UPDATE") 6316 6317 kind = self._prev.text.lower() 6318 6319 if self._match_text_seq("NO", "ACTION"): 6320 action = "NO ACTION" 6321 elif self._match(TokenType.SET): 6322 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6323 action = "SET " + self._prev.text.upper() 6324 else: 6325 self._advance() 6326 action = self._prev.text.upper() 6327 6328 on_options[kind] = action 6329 6330 return self.expression( 6331 exp.ForeignKey, 6332 expressions=expressions, 6333 reference=reference, 6334 options=self._parse_key_constraint_options(), 6335 **on_options, # type: ignore 6336 ) 6337 6338 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6339 return self._parse_ordered() or self._parse_field() 6340 6341 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6342 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6343 self._retreat(self._index - 1) 6344 return None 6345 6346 id_vars = self._parse_wrapped_id_vars() 6347 return self.expression( 6348 exp.PeriodForSystemTimeConstraint, 6349 this=seq_get(id_vars, 0), 6350 expression=seq_get(id_vars, 1), 6351 ) 6352 6353 def _parse_primary_key( 6354 self, wrapped_optional: bool = False, in_props: bool = False 6355 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6356 desc = ( 6357 self._match_set((TokenType.ASC, TokenType.DESC)) 6358 and self._prev.token_type == TokenType.DESC 6359 ) 6360 6361 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6362 return self.expression( 6363 exp.PrimaryKeyColumnConstraint, 6364 desc=desc, 6365 options=self._parse_key_constraint_options(), 6366 ) 6367 6368 expressions = self._parse_wrapped_csv( 6369 self._parse_primary_key_part, optional=wrapped_optional 6370 ) 6371 6372 return self.expression( 6373 exp.PrimaryKey, 6374 expressions=expressions, 6375 include=self._parse_index_params(), 6376 options=self._parse_key_constraint_options(), 6377 ) 6378 6379 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6380 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6381 6382 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6383 """ 6384 Parses a datetime column in ODBC format. We parse the column into the corresponding 6385 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6386 same as we did for `DATE('yyyy-mm-dd')`. 6387 6388 Reference: 6389 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6390 """ 6391 self._match(TokenType.VAR) 6392 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6393 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6394 if not self._match(TokenType.R_BRACE): 6395 self.raise_error("Expected }") 6396 return expression 6397 6398 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6399 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6400 return this 6401 6402 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6403 map_token = seq_get(self._tokens, self._index - 2) 6404 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6405 else: 6406 parse_map = False 6407 6408 bracket_kind = self._prev.token_type 6409 if ( 6410 bracket_kind == TokenType.L_BRACE 6411 and self._curr 6412 and self._curr.token_type == TokenType.VAR 6413 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6414 ): 6415 return self._parse_odbc_datetime_literal() 6416 6417 expressions = self._parse_csv( 6418 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6419 ) 6420 6421 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6422 self.raise_error("Expected ]") 6423 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6424 self.raise_error("Expected }") 6425 6426 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6427 if bracket_kind == TokenType.L_BRACE: 6428 this = self.expression( 6429 exp.Struct, 6430 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6431 ) 6432 elif not this: 6433 this = build_array_constructor( 6434 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6435 ) 6436 else: 6437 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6438 if constructor_type: 6439 return build_array_constructor( 6440 constructor_type, 6441 args=expressions, 6442 bracket_kind=bracket_kind, 6443 dialect=self.dialect, 6444 ) 6445 6446 expressions = apply_index_offset( 6447 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6448 ) 6449 this = self.expression( 6450 exp.Bracket, 6451 this=this, 6452 expressions=expressions, 6453 comments=this.pop_comments(), 6454 ) 6455 6456 self._add_comments(this) 6457 return self._parse_bracket(this) 6458 6459 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6460 if self._match(TokenType.COLON): 6461 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6462 return this 6463 6464 def _parse_case(self) -> t.Optional[exp.Expression]: 6465 ifs = [] 6466 default = None 6467 6468 comments = self._prev_comments 6469 expression = self._parse_assignment() 6470 6471 while self._match(TokenType.WHEN): 6472 this = self._parse_assignment() 6473 self._match(TokenType.THEN) 6474 then = self._parse_assignment() 6475 ifs.append(self.expression(exp.If, this=this, true=then)) 6476 6477 if self._match(TokenType.ELSE): 6478 default = self._parse_assignment() 6479 6480 if not self._match(TokenType.END): 6481 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6482 default = exp.column("interval") 6483 else: 6484 self.raise_error("Expected END after CASE", self._prev) 6485 6486 return self.expression( 6487 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6488 ) 6489 6490 def _parse_if(self) -> t.Optional[exp.Expression]: 6491 if self._match(TokenType.L_PAREN): 6492 args = self._parse_csv( 6493 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6494 ) 6495 this = self.validate_expression(exp.If.from_arg_list(args), args) 6496 self._match_r_paren() 6497 else: 6498 index = self._index - 1 6499 6500 if self.NO_PAREN_IF_COMMANDS and index == 0: 6501 return self._parse_as_command(self._prev) 6502 6503 condition = self._parse_assignment() 6504 6505 if not condition: 6506 self._retreat(index) 6507 return None 6508 6509 self._match(TokenType.THEN) 6510 true = self._parse_assignment() 6511 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6512 self._match(TokenType.END) 6513 this = self.expression(exp.If, this=condition, true=true, false=false) 6514 6515 return this 6516 6517 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6518 if not self._match_text_seq("VALUE", "FOR"): 6519 self._retreat(self._index - 1) 6520 return None 6521 6522 return self.expression( 6523 exp.NextValueFor, 6524 this=self._parse_column(), 6525 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6526 ) 6527 6528 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6529 this = self._parse_function() or self._parse_var_or_string(upper=True) 6530 6531 if self._match(TokenType.FROM): 6532 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6533 6534 if not self._match(TokenType.COMMA): 6535 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6536 6537 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6538 6539 def _parse_gap_fill(self) -> exp.GapFill: 6540 self._match(TokenType.TABLE) 6541 this = self._parse_table() 6542 6543 self._match(TokenType.COMMA) 6544 args = [this, *self._parse_csv(self._parse_lambda)] 6545 6546 gap_fill = exp.GapFill.from_arg_list(args) 6547 return self.validate_expression(gap_fill, args) 6548 6549 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6550 this = self._parse_assignment() 6551 6552 if not self._match(TokenType.ALIAS): 6553 if self._match(TokenType.COMMA): 6554 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6555 6556 self.raise_error("Expected AS after CAST") 6557 6558 fmt = None 6559 to = self._parse_types() 6560 6561 default = self._match(TokenType.DEFAULT) 6562 if default: 6563 default = self._parse_bitwise() 6564 self._match_text_seq("ON", "CONVERSION", "ERROR") 6565 6566 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6567 fmt_string = self._parse_string() 6568 fmt = self._parse_at_time_zone(fmt_string) 6569 6570 if not to: 6571 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6572 if to.this in exp.DataType.TEMPORAL_TYPES: 6573 this = self.expression( 6574 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6575 this=this, 6576 format=exp.Literal.string( 6577 format_time( 6578 fmt_string.this if fmt_string else "", 6579 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6580 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6581 ) 6582 ), 6583 safe=safe, 6584 ) 6585 6586 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6587 this.set("zone", fmt.args["zone"]) 6588 return this 6589 elif not to: 6590 self.raise_error("Expected TYPE after CAST") 6591 elif isinstance(to, exp.Identifier): 6592 to = exp.DataType.build(to.name, dialect=self.dialect, udt=True) 6593 elif to.this == exp.DataType.Type.CHAR: 6594 if self._match(TokenType.CHARACTER_SET): 6595 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6596 6597 return self.build_cast( 6598 strict=strict, 6599 this=this, 6600 to=to, 6601 format=fmt, 6602 safe=safe, 6603 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6604 default=default, 6605 ) 6606 6607 def _parse_string_agg(self) -> exp.GroupConcat: 6608 if self._match(TokenType.DISTINCT): 6609 args: t.List[t.Optional[exp.Expression]] = [ 6610 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6611 ] 6612 if self._match(TokenType.COMMA): 6613 args.extend(self._parse_csv(self._parse_assignment)) 6614 else: 6615 args = self._parse_csv(self._parse_assignment) # type: ignore 6616 6617 if self._match_text_seq("ON", "OVERFLOW"): 6618 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6619 if self._match_text_seq("ERROR"): 6620 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6621 else: 6622 self._match_text_seq("TRUNCATE") 6623 on_overflow = self.expression( 6624 exp.OverflowTruncateBehavior, 6625 this=self._parse_string(), 6626 with_count=( 6627 self._match_text_seq("WITH", "COUNT") 6628 or not self._match_text_seq("WITHOUT", "COUNT") 6629 ), 6630 ) 6631 else: 6632 on_overflow = None 6633 6634 index = self._index 6635 if not self._match(TokenType.R_PAREN) and args: 6636 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6637 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6638 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6639 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6640 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6641 6642 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6643 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6644 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6645 if not self._match_text_seq("WITHIN", "GROUP"): 6646 self._retreat(index) 6647 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6648 6649 # The corresponding match_r_paren will be called in parse_function (caller) 6650 self._match_l_paren() 6651 6652 return self.expression( 6653 exp.GroupConcat, 6654 this=self._parse_order(this=seq_get(args, 0)), 6655 separator=seq_get(args, 1), 6656 on_overflow=on_overflow, 6657 ) 6658 6659 def _parse_convert( 6660 self, strict: bool, safe: t.Optional[bool] = None 6661 ) -> t.Optional[exp.Expression]: 6662 this = self._parse_bitwise() 6663 6664 if self._match(TokenType.USING): 6665 to: t.Optional[exp.Expression] = self.expression( 6666 exp.CharacterSet, this=self._parse_var() 6667 ) 6668 elif self._match(TokenType.COMMA): 6669 to = self._parse_types() 6670 else: 6671 to = None 6672 6673 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 6674 6675 def _parse_xml_table(self) -> exp.XMLTable: 6676 namespaces = None 6677 passing = None 6678 columns = None 6679 6680 if self._match_text_seq("XMLNAMESPACES", "("): 6681 namespaces = self._parse_xml_namespace() 6682 self._match_text_seq(")", ",") 6683 6684 this = self._parse_string() 6685 6686 if self._match_text_seq("PASSING"): 6687 # The BY VALUE keywords are optional and are provided for semantic clarity 6688 self._match_text_seq("BY", "VALUE") 6689 passing = self._parse_csv(self._parse_column) 6690 6691 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6692 6693 if self._match_text_seq("COLUMNS"): 6694 columns = self._parse_csv(self._parse_field_def) 6695 6696 return self.expression( 6697 exp.XMLTable, 6698 this=this, 6699 namespaces=namespaces, 6700 passing=passing, 6701 columns=columns, 6702 by_ref=by_ref, 6703 ) 6704 6705 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6706 namespaces = [] 6707 6708 while True: 6709 if self._match(TokenType.DEFAULT): 6710 uri = self._parse_string() 6711 else: 6712 uri = self._parse_alias(self._parse_string()) 6713 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6714 if not self._match(TokenType.COMMA): 6715 break 6716 6717 return namespaces 6718 6719 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 6720 args = self._parse_csv(self._parse_assignment) 6721 6722 if len(args) < 3: 6723 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6724 6725 return self.expression(exp.DecodeCase, expressions=args) 6726 6727 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6728 self._match_text_seq("KEY") 6729 key = self._parse_column() 6730 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6731 self._match_text_seq("VALUE") 6732 value = self._parse_bitwise() 6733 6734 if not key and not value: 6735 return None 6736 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6737 6738 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6739 if not this or not self._match_text_seq("FORMAT", "JSON"): 6740 return this 6741 6742 return self.expression(exp.FormatJson, this=this) 6743 6744 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6745 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6746 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6747 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6748 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6749 else: 6750 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6751 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6752 6753 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6754 6755 if not empty and not error and not null: 6756 return None 6757 6758 return self.expression( 6759 exp.OnCondition, 6760 empty=empty, 6761 error=error, 6762 null=null, 6763 ) 6764 6765 def _parse_on_handling( 6766 self, on: str, *values: str 6767 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6768 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6769 for value in values: 6770 if self._match_text_seq(value, "ON", on): 6771 return f"{value} ON {on}" 6772 6773 index = self._index 6774 if self._match(TokenType.DEFAULT): 6775 default_value = self._parse_bitwise() 6776 if self._match_text_seq("ON", on): 6777 return default_value 6778 6779 self._retreat(index) 6780 6781 return None 6782 6783 @t.overload 6784 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6785 6786 @t.overload 6787 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6788 6789 def _parse_json_object(self, agg=False): 6790 star = self._parse_star() 6791 expressions = ( 6792 [star] 6793 if star 6794 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6795 ) 6796 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6797 6798 unique_keys = None 6799 if self._match_text_seq("WITH", "UNIQUE"): 6800 unique_keys = True 6801 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6802 unique_keys = False 6803 6804 self._match_text_seq("KEYS") 6805 6806 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6807 self._parse_type() 6808 ) 6809 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6810 6811 return self.expression( 6812 exp.JSONObjectAgg if agg else exp.JSONObject, 6813 expressions=expressions, 6814 null_handling=null_handling, 6815 unique_keys=unique_keys, 6816 return_type=return_type, 6817 encoding=encoding, 6818 ) 6819 6820 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6821 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6822 if not self._match_text_seq("NESTED"): 6823 this = self._parse_id_var() 6824 kind = self._parse_types(allow_identifiers=False) 6825 nested = None 6826 else: 6827 this = None 6828 kind = None 6829 nested = True 6830 6831 path = self._match_text_seq("PATH") and self._parse_string() 6832 nested_schema = nested and self._parse_json_schema() 6833 6834 return self.expression( 6835 exp.JSONColumnDef, 6836 this=this, 6837 kind=kind, 6838 path=path, 6839 nested_schema=nested_schema, 6840 ) 6841 6842 def _parse_json_schema(self) -> exp.JSONSchema: 6843 self._match_text_seq("COLUMNS") 6844 return self.expression( 6845 exp.JSONSchema, 6846 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6847 ) 6848 6849 def _parse_json_table(self) -> exp.JSONTable: 6850 this = self._parse_format_json(self._parse_bitwise()) 6851 path = self._match(TokenType.COMMA) and self._parse_string() 6852 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6853 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6854 schema = self._parse_json_schema() 6855 6856 return exp.JSONTable( 6857 this=this, 6858 schema=schema, 6859 path=path, 6860 error_handling=error_handling, 6861 empty_handling=empty_handling, 6862 ) 6863 6864 def _parse_match_against(self) -> exp.MatchAgainst: 6865 expressions = self._parse_csv(self._parse_column) 6866 6867 self._match_text_seq(")", "AGAINST", "(") 6868 6869 this = self._parse_string() 6870 6871 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6872 modifier = "IN NATURAL LANGUAGE MODE" 6873 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6874 modifier = f"{modifier} WITH QUERY EXPANSION" 6875 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6876 modifier = "IN BOOLEAN MODE" 6877 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6878 modifier = "WITH QUERY EXPANSION" 6879 else: 6880 modifier = None 6881 6882 return self.expression( 6883 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6884 ) 6885 6886 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6887 def _parse_open_json(self) -> exp.OpenJSON: 6888 this = self._parse_bitwise() 6889 path = self._match(TokenType.COMMA) and self._parse_string() 6890 6891 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6892 this = self._parse_field(any_token=True) 6893 kind = self._parse_types() 6894 path = self._parse_string() 6895 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6896 6897 return self.expression( 6898 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6899 ) 6900 6901 expressions = None 6902 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6903 self._match_l_paren() 6904 expressions = self._parse_csv(_parse_open_json_column_def) 6905 6906 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6907 6908 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6909 args = self._parse_csv(self._parse_bitwise) 6910 6911 if self._match(TokenType.IN): 6912 return self.expression( 6913 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6914 ) 6915 6916 if haystack_first: 6917 haystack = seq_get(args, 0) 6918 needle = seq_get(args, 1) 6919 else: 6920 haystack = seq_get(args, 1) 6921 needle = seq_get(args, 0) 6922 6923 return self.expression( 6924 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6925 ) 6926 6927 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6928 args = self._parse_csv(self._parse_table) 6929 return exp.JoinHint(this=func_name.upper(), expressions=args) 6930 6931 def _parse_substring(self) -> exp.Substring: 6932 # Postgres supports the form: substring(string [from int] [for int]) 6933 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6934 6935 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6936 6937 if self._match(TokenType.FROM): 6938 args.append(self._parse_bitwise()) 6939 if self._match(TokenType.FOR): 6940 if len(args) == 1: 6941 args.append(exp.Literal.number(1)) 6942 args.append(self._parse_bitwise()) 6943 6944 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6945 6946 def _parse_trim(self) -> exp.Trim: 6947 # https://www.w3resource.com/sql/character-functions/trim.php 6948 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6949 6950 position = None 6951 collation = None 6952 expression = None 6953 6954 if self._match_texts(self.TRIM_TYPES): 6955 position = self._prev.text.upper() 6956 6957 this = self._parse_bitwise() 6958 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6959 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6960 expression = self._parse_bitwise() 6961 6962 if invert_order: 6963 this, expression = expression, this 6964 6965 if self._match(TokenType.COLLATE): 6966 collation = self._parse_bitwise() 6967 6968 return self.expression( 6969 exp.Trim, this=this, position=position, expression=expression, collation=collation 6970 ) 6971 6972 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6973 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6974 6975 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6976 return self._parse_window(self._parse_id_var(), alias=True) 6977 6978 def _parse_respect_or_ignore_nulls( 6979 self, this: t.Optional[exp.Expression] 6980 ) -> t.Optional[exp.Expression]: 6981 if self._match_text_seq("IGNORE", "NULLS"): 6982 return self.expression(exp.IgnoreNulls, this=this) 6983 if self._match_text_seq("RESPECT", "NULLS"): 6984 return self.expression(exp.RespectNulls, this=this) 6985 return this 6986 6987 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6988 if self._match(TokenType.HAVING): 6989 self._match_texts(("MAX", "MIN")) 6990 max = self._prev.text.upper() != "MIN" 6991 return self.expression( 6992 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6993 ) 6994 6995 return this 6996 6997 def _parse_window( 6998 self, this: t.Optional[exp.Expression], alias: bool = False 6999 ) -> t.Optional[exp.Expression]: 7000 func = this 7001 comments = func.comments if isinstance(func, exp.Expression) else None 7002 7003 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 7004 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 7005 if self._match_text_seq("WITHIN", "GROUP"): 7006 order = self._parse_wrapped(self._parse_order) 7007 this = self.expression(exp.WithinGroup, this=this, expression=order) 7008 7009 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 7010 self._match(TokenType.WHERE) 7011 this = self.expression( 7012 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 7013 ) 7014 self._match_r_paren() 7015 7016 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 7017 # Some dialects choose to implement and some do not. 7018 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 7019 7020 # There is some code above in _parse_lambda that handles 7021 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 7022 7023 # The below changes handle 7024 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 7025 7026 # Oracle allows both formats 7027 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 7028 # and Snowflake chose to do the same for familiarity 7029 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 7030 if isinstance(this, exp.AggFunc): 7031 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 7032 7033 if ignore_respect and ignore_respect is not this: 7034 ignore_respect.replace(ignore_respect.this) 7035 this = self.expression(ignore_respect.__class__, this=this) 7036 7037 this = self._parse_respect_or_ignore_nulls(this) 7038 7039 # bigquery select from window x AS (partition by ...) 7040 if alias: 7041 over = None 7042 self._match(TokenType.ALIAS) 7043 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 7044 return this 7045 else: 7046 over = self._prev.text.upper() 7047 7048 if comments and isinstance(func, exp.Expression): 7049 func.pop_comments() 7050 7051 if not self._match(TokenType.L_PAREN): 7052 return self.expression( 7053 exp.Window, 7054 comments=comments, 7055 this=this, 7056 alias=self._parse_id_var(False), 7057 over=over, 7058 ) 7059 7060 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7061 7062 first = self._match(TokenType.FIRST) 7063 if self._match_text_seq("LAST"): 7064 first = False 7065 7066 partition, order = self._parse_partition_and_order() 7067 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7068 7069 if kind: 7070 self._match(TokenType.BETWEEN) 7071 start = self._parse_window_spec() 7072 self._match(TokenType.AND) 7073 end = self._parse_window_spec() 7074 exclude = ( 7075 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7076 if self._match_text_seq("EXCLUDE") 7077 else None 7078 ) 7079 7080 spec = self.expression( 7081 exp.WindowSpec, 7082 kind=kind, 7083 start=start["value"], 7084 start_side=start["side"], 7085 end=end["value"], 7086 end_side=end["side"], 7087 exclude=exclude, 7088 ) 7089 else: 7090 spec = None 7091 7092 self._match_r_paren() 7093 7094 window = self.expression( 7095 exp.Window, 7096 comments=comments, 7097 this=this, 7098 partition_by=partition, 7099 order=order, 7100 spec=spec, 7101 alias=window_alias, 7102 over=over, 7103 first=first, 7104 ) 7105 7106 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7107 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7108 return self._parse_window(window, alias=alias) 7109 7110 return window 7111 7112 def _parse_partition_and_order( 7113 self, 7114 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7115 return self._parse_partition_by(), self._parse_order() 7116 7117 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7118 self._match(TokenType.BETWEEN) 7119 7120 return { 7121 "value": ( 7122 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7123 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7124 or self._parse_bitwise() 7125 ), 7126 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7127 } 7128 7129 def _parse_alias( 7130 self, this: t.Optional[exp.Expression], explicit: bool = False 7131 ) -> t.Optional[exp.Expression]: 7132 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7133 # so this section tries to parse the clause version and if it fails, it treats the token 7134 # as an identifier (alias) 7135 if self._can_parse_limit_or_offset(): 7136 return this 7137 7138 any_token = self._match(TokenType.ALIAS) 7139 comments = self._prev_comments or [] 7140 7141 if explicit and not any_token: 7142 return this 7143 7144 if self._match(TokenType.L_PAREN): 7145 aliases = self.expression( 7146 exp.Aliases, 7147 comments=comments, 7148 this=this, 7149 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7150 ) 7151 self._match_r_paren(aliases) 7152 return aliases 7153 7154 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7155 self.STRING_ALIASES and self._parse_string_as_identifier() 7156 ) 7157 7158 if alias: 7159 comments.extend(alias.pop_comments()) 7160 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7161 column = this.this 7162 7163 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7164 if not this.comments and column and column.comments: 7165 this.comments = column.pop_comments() 7166 7167 return this 7168 7169 def _parse_id_var( 7170 self, 7171 any_token: bool = True, 7172 tokens: t.Optional[t.Collection[TokenType]] = None, 7173 ) -> t.Optional[exp.Expression]: 7174 expression = self._parse_identifier() 7175 if not expression and ( 7176 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7177 ): 7178 quoted = self._prev.token_type == TokenType.STRING 7179 expression = self._identifier_expression(quoted=quoted) 7180 7181 return expression 7182 7183 def _parse_string(self) -> t.Optional[exp.Expression]: 7184 if self._match_set(self.STRING_PARSERS): 7185 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7186 return self._parse_placeholder() 7187 7188 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7189 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7190 if output: 7191 output.update_positions(self._prev) 7192 return output 7193 7194 def _parse_number(self) -> t.Optional[exp.Expression]: 7195 if self._match_set(self.NUMERIC_PARSERS): 7196 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7197 return self._parse_placeholder() 7198 7199 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7200 if self._match(TokenType.IDENTIFIER): 7201 return self._identifier_expression(quoted=True) 7202 return self._parse_placeholder() 7203 7204 def _parse_var( 7205 self, 7206 any_token: bool = False, 7207 tokens: t.Optional[t.Collection[TokenType]] = None, 7208 upper: bool = False, 7209 ) -> t.Optional[exp.Expression]: 7210 if ( 7211 (any_token and self._advance_any()) 7212 or self._match(TokenType.VAR) 7213 or (self._match_set(tokens) if tokens else False) 7214 ): 7215 return self.expression( 7216 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7217 ) 7218 return self._parse_placeholder() 7219 7220 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7221 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7222 self._advance() 7223 return self._prev 7224 return None 7225 7226 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7227 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7228 7229 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7230 return self._parse_primary() or self._parse_var(any_token=True) 7231 7232 def _parse_null(self) -> t.Optional[exp.Expression]: 7233 if self._match_set(self.NULL_TOKENS): 7234 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7235 return self._parse_placeholder() 7236 7237 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7238 if self._match(TokenType.TRUE): 7239 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7240 if self._match(TokenType.FALSE): 7241 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7242 return self._parse_placeholder() 7243 7244 def _parse_star(self) -> t.Optional[exp.Expression]: 7245 if self._match(TokenType.STAR): 7246 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7247 return self._parse_placeholder() 7248 7249 def _parse_parameter(self) -> exp.Parameter: 7250 this = self._parse_identifier() or self._parse_primary_or_var() 7251 return self.expression(exp.Parameter, this=this) 7252 7253 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7254 if self._match_set(self.PLACEHOLDER_PARSERS): 7255 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7256 if placeholder: 7257 return placeholder 7258 self._advance(-1) 7259 return None 7260 7261 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7262 if not self._match_texts(keywords): 7263 return None 7264 if self._match(TokenType.L_PAREN, advance=False): 7265 return self._parse_wrapped_csv(self._parse_expression) 7266 7267 expression = self._parse_expression() 7268 return [expression] if expression else None 7269 7270 def _parse_csv( 7271 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7272 ) -> t.List[exp.Expression]: 7273 parse_result = parse_method() 7274 items = [parse_result] if parse_result is not None else [] 7275 7276 while self._match(sep): 7277 self._add_comments(parse_result) 7278 parse_result = parse_method() 7279 if parse_result is not None: 7280 items.append(parse_result) 7281 7282 return items 7283 7284 def _parse_tokens( 7285 self, parse_method: t.Callable, expressions: t.Dict 7286 ) -> t.Optional[exp.Expression]: 7287 this = parse_method() 7288 7289 while self._match_set(expressions): 7290 this = self.expression( 7291 expressions[self._prev.token_type], 7292 this=this, 7293 comments=self._prev_comments, 7294 expression=parse_method(), 7295 ) 7296 7297 return this 7298 7299 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7300 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7301 7302 def _parse_wrapped_csv( 7303 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7304 ) -> t.List[exp.Expression]: 7305 return self._parse_wrapped( 7306 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7307 ) 7308 7309 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7310 wrapped = self._match(TokenType.L_PAREN) 7311 if not wrapped and not optional: 7312 self.raise_error("Expecting (") 7313 parse_result = parse_method() 7314 if wrapped: 7315 self._match_r_paren() 7316 return parse_result 7317 7318 def _parse_expressions(self) -> t.List[exp.Expression]: 7319 return self._parse_csv(self._parse_expression) 7320 7321 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7322 return ( 7323 self._parse_set_operations( 7324 self._parse_alias(self._parse_assignment(), explicit=True) 7325 if alias 7326 else self._parse_assignment() 7327 ) 7328 or self._parse_select() 7329 ) 7330 7331 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7332 return self._parse_query_modifiers( 7333 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7334 ) 7335 7336 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7337 this = None 7338 if self._match_texts(self.TRANSACTION_KIND): 7339 this = self._prev.text 7340 7341 self._match_texts(("TRANSACTION", "WORK")) 7342 7343 modes = [] 7344 while True: 7345 mode = [] 7346 while self._match(TokenType.VAR): 7347 mode.append(self._prev.text) 7348 7349 if mode: 7350 modes.append(" ".join(mode)) 7351 if not self._match(TokenType.COMMA): 7352 break 7353 7354 return self.expression(exp.Transaction, this=this, modes=modes) 7355 7356 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7357 chain = None 7358 savepoint = None 7359 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7360 7361 self._match_texts(("TRANSACTION", "WORK")) 7362 7363 if self._match_text_seq("TO"): 7364 self._match_text_seq("SAVEPOINT") 7365 savepoint = self._parse_id_var() 7366 7367 if self._match(TokenType.AND): 7368 chain = not self._match_text_seq("NO") 7369 self._match_text_seq("CHAIN") 7370 7371 if is_rollback: 7372 return self.expression(exp.Rollback, savepoint=savepoint) 7373 7374 return self.expression(exp.Commit, chain=chain) 7375 7376 def _parse_refresh(self) -> exp.Refresh: 7377 self._match(TokenType.TABLE) 7378 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7379 7380 def _parse_column_def_with_exists(self): 7381 start = self._index 7382 self._match(TokenType.COLUMN) 7383 7384 exists_column = self._parse_exists(not_=True) 7385 expression = self._parse_field_def() 7386 7387 if not isinstance(expression, exp.ColumnDef): 7388 self._retreat(start) 7389 return None 7390 7391 expression.set("exists", exists_column) 7392 7393 return expression 7394 7395 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7396 if not self._prev.text.upper() == "ADD": 7397 return None 7398 7399 expression = self._parse_column_def_with_exists() 7400 if not expression: 7401 return None 7402 7403 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7404 if self._match_texts(("FIRST", "AFTER")): 7405 position = self._prev.text 7406 column_position = self.expression( 7407 exp.ColumnPosition, this=self._parse_column(), position=position 7408 ) 7409 expression.set("position", column_position) 7410 7411 return expression 7412 7413 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7414 drop = self._match(TokenType.DROP) and self._parse_drop() 7415 if drop and not isinstance(drop, exp.Command): 7416 drop.set("kind", drop.args.get("kind", "COLUMN")) 7417 return drop 7418 7419 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7420 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7421 return self.expression( 7422 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7423 ) 7424 7425 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7426 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7427 self._match_text_seq("ADD") 7428 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7429 return self.expression( 7430 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7431 ) 7432 7433 column_def = self._parse_add_column() 7434 if isinstance(column_def, exp.ColumnDef): 7435 return column_def 7436 7437 exists = self._parse_exists(not_=True) 7438 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7439 return self.expression( 7440 exp.AddPartition, 7441 exists=exists, 7442 this=self._parse_field(any_token=True), 7443 location=self._match_text_seq("LOCATION", advance=False) 7444 and self._parse_property(), 7445 ) 7446 7447 return None 7448 7449 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7450 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7451 or self._match_text_seq("COLUMNS") 7452 ): 7453 schema = self._parse_schema() 7454 7455 return ( 7456 ensure_list(schema) 7457 if schema 7458 else self._parse_csv(self._parse_column_def_with_exists) 7459 ) 7460 7461 return self._parse_csv(_parse_add_alteration) 7462 7463 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7464 if self._match_texts(self.ALTER_ALTER_PARSERS): 7465 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7466 7467 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7468 # keyword after ALTER we default to parsing this statement 7469 self._match(TokenType.COLUMN) 7470 column = self._parse_field(any_token=True) 7471 7472 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7473 return self.expression(exp.AlterColumn, this=column, drop=True) 7474 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7475 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7476 if self._match(TokenType.COMMENT): 7477 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7478 if self._match_text_seq("DROP", "NOT", "NULL"): 7479 return self.expression( 7480 exp.AlterColumn, 7481 this=column, 7482 drop=True, 7483 allow_null=True, 7484 ) 7485 if self._match_text_seq("SET", "NOT", "NULL"): 7486 return self.expression( 7487 exp.AlterColumn, 7488 this=column, 7489 allow_null=False, 7490 ) 7491 7492 if self._match_text_seq("SET", "VISIBLE"): 7493 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7494 if self._match_text_seq("SET", "INVISIBLE"): 7495 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7496 7497 self._match_text_seq("SET", "DATA") 7498 self._match_text_seq("TYPE") 7499 return self.expression( 7500 exp.AlterColumn, 7501 this=column, 7502 dtype=self._parse_types(), 7503 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7504 using=self._match(TokenType.USING) and self._parse_assignment(), 7505 ) 7506 7507 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7508 if self._match_texts(("ALL", "EVEN", "AUTO")): 7509 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7510 7511 self._match_text_seq("KEY", "DISTKEY") 7512 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7513 7514 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7515 if compound: 7516 self._match_text_seq("SORTKEY") 7517 7518 if self._match(TokenType.L_PAREN, advance=False): 7519 return self.expression( 7520 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7521 ) 7522 7523 self._match_texts(("AUTO", "NONE")) 7524 return self.expression( 7525 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7526 ) 7527 7528 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7529 index = self._index - 1 7530 7531 partition_exists = self._parse_exists() 7532 if self._match(TokenType.PARTITION, advance=False): 7533 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7534 7535 self._retreat(index) 7536 return self._parse_csv(self._parse_drop_column) 7537 7538 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7539 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7540 exists = self._parse_exists() 7541 old_column = self._parse_column() 7542 to = self._match_text_seq("TO") 7543 new_column = self._parse_column() 7544 7545 if old_column is None or to is None or new_column is None: 7546 return None 7547 7548 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7549 7550 self._match_text_seq("TO") 7551 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7552 7553 def _parse_alter_table_set(self) -> exp.AlterSet: 7554 alter_set = self.expression(exp.AlterSet) 7555 7556 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7557 "TABLE", "PROPERTIES" 7558 ): 7559 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7560 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7561 alter_set.set("expressions", [self._parse_assignment()]) 7562 elif self._match_texts(("LOGGED", "UNLOGGED")): 7563 alter_set.set("option", exp.var(self._prev.text.upper())) 7564 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7565 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7566 elif self._match_text_seq("LOCATION"): 7567 alter_set.set("location", self._parse_field()) 7568 elif self._match_text_seq("ACCESS", "METHOD"): 7569 alter_set.set("access_method", self._parse_field()) 7570 elif self._match_text_seq("TABLESPACE"): 7571 alter_set.set("tablespace", self._parse_field()) 7572 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7573 alter_set.set("file_format", [self._parse_field()]) 7574 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7575 alter_set.set("file_format", self._parse_wrapped_options()) 7576 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7577 alter_set.set("copy_options", self._parse_wrapped_options()) 7578 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7579 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7580 else: 7581 if self._match_text_seq("SERDE"): 7582 alter_set.set("serde", self._parse_field()) 7583 7584 properties = self._parse_wrapped(self._parse_properties, optional=True) 7585 alter_set.set("expressions", [properties]) 7586 7587 return alter_set 7588 7589 def _parse_alter_session(self) -> exp.AlterSession: 7590 """Parse ALTER SESSION SET/UNSET statements.""" 7591 if self._match(TokenType.SET): 7592 expressions = self._parse_csv(lambda: self._parse_set_item_assignment()) 7593 return self.expression(exp.AlterSession, expressions=expressions, unset=False) 7594 7595 self._match_text_seq("UNSET") 7596 expressions = self._parse_csv( 7597 lambda: self.expression(exp.SetItem, this=self._parse_id_var(any_token=True)) 7598 ) 7599 return self.expression(exp.AlterSession, expressions=expressions, unset=True) 7600 7601 def _parse_alter(self) -> exp.Alter | exp.Command: 7602 start = self._prev 7603 7604 alter_token = self._match_set(self.ALTERABLES) and self._prev 7605 if not alter_token: 7606 return self._parse_as_command(start) 7607 7608 exists = self._parse_exists() 7609 only = self._match_text_seq("ONLY") 7610 7611 if alter_token.token_type == TokenType.SESSION: 7612 this = None 7613 check = None 7614 cluster = None 7615 else: 7616 this = self._parse_table(schema=True) 7617 check = self._match_text_seq("WITH", "CHECK") 7618 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7619 7620 if self._next: 7621 self._advance() 7622 7623 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7624 if parser: 7625 actions = ensure_list(parser(self)) 7626 not_valid = self._match_text_seq("NOT", "VALID") 7627 options = self._parse_csv(self._parse_property) 7628 7629 if not self._curr and actions: 7630 return self.expression( 7631 exp.Alter, 7632 this=this, 7633 kind=alter_token.text.upper(), 7634 exists=exists, 7635 actions=actions, 7636 only=only, 7637 options=options, 7638 cluster=cluster, 7639 not_valid=not_valid, 7640 check=check, 7641 ) 7642 7643 return self._parse_as_command(start) 7644 7645 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7646 start = self._prev 7647 # https://duckdb.org/docs/sql/statements/analyze 7648 if not self._curr: 7649 return self.expression(exp.Analyze) 7650 7651 options = [] 7652 while self._match_texts(self.ANALYZE_STYLES): 7653 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7654 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7655 else: 7656 options.append(self._prev.text.upper()) 7657 7658 this: t.Optional[exp.Expression] = None 7659 inner_expression: t.Optional[exp.Expression] = None 7660 7661 kind = self._curr and self._curr.text.upper() 7662 7663 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7664 this = self._parse_table_parts() 7665 elif self._match_text_seq("TABLES"): 7666 if self._match_set((TokenType.FROM, TokenType.IN)): 7667 kind = f"{kind} {self._prev.text.upper()}" 7668 this = self._parse_table(schema=True, is_db_reference=True) 7669 elif self._match_text_seq("DATABASE"): 7670 this = self._parse_table(schema=True, is_db_reference=True) 7671 elif self._match_text_seq("CLUSTER"): 7672 this = self._parse_table() 7673 # Try matching inner expr keywords before fallback to parse table. 7674 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7675 kind = None 7676 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7677 else: 7678 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7679 kind = None 7680 this = self._parse_table_parts() 7681 7682 partition = self._try_parse(self._parse_partition) 7683 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7684 return self._parse_as_command(start) 7685 7686 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7687 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7688 "WITH", "ASYNC", "MODE" 7689 ): 7690 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7691 else: 7692 mode = None 7693 7694 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7695 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7696 7697 properties = self._parse_properties() 7698 return self.expression( 7699 exp.Analyze, 7700 kind=kind, 7701 this=this, 7702 mode=mode, 7703 partition=partition, 7704 properties=properties, 7705 expression=inner_expression, 7706 options=options, 7707 ) 7708 7709 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7710 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7711 this = None 7712 kind = self._prev.text.upper() 7713 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7714 expressions = [] 7715 7716 if not self._match_text_seq("STATISTICS"): 7717 self.raise_error("Expecting token STATISTICS") 7718 7719 if self._match_text_seq("NOSCAN"): 7720 this = "NOSCAN" 7721 elif self._match(TokenType.FOR): 7722 if self._match_text_seq("ALL", "COLUMNS"): 7723 this = "FOR ALL COLUMNS" 7724 if self._match_texts("COLUMNS"): 7725 this = "FOR COLUMNS" 7726 expressions = self._parse_csv(self._parse_column_reference) 7727 elif self._match_text_seq("SAMPLE"): 7728 sample = self._parse_number() 7729 expressions = [ 7730 self.expression( 7731 exp.AnalyzeSample, 7732 sample=sample, 7733 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7734 ) 7735 ] 7736 7737 return self.expression( 7738 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7739 ) 7740 7741 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7742 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7743 kind = None 7744 this = None 7745 expression: t.Optional[exp.Expression] = None 7746 if self._match_text_seq("REF", "UPDATE"): 7747 kind = "REF" 7748 this = "UPDATE" 7749 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7750 this = "UPDATE SET DANGLING TO NULL" 7751 elif self._match_text_seq("STRUCTURE"): 7752 kind = "STRUCTURE" 7753 if self._match_text_seq("CASCADE", "FAST"): 7754 this = "CASCADE FAST" 7755 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7756 ("ONLINE", "OFFLINE") 7757 ): 7758 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7759 expression = self._parse_into() 7760 7761 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7762 7763 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7764 this = self._prev.text.upper() 7765 if self._match_text_seq("COLUMNS"): 7766 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7767 return None 7768 7769 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7770 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7771 if self._match_text_seq("STATISTICS"): 7772 return self.expression(exp.AnalyzeDelete, kind=kind) 7773 return None 7774 7775 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7776 if self._match_text_seq("CHAINED", "ROWS"): 7777 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7778 return None 7779 7780 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7781 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7782 this = self._prev.text.upper() 7783 expression: t.Optional[exp.Expression] = None 7784 expressions = [] 7785 update_options = None 7786 7787 if self._match_text_seq("HISTOGRAM", "ON"): 7788 expressions = self._parse_csv(self._parse_column_reference) 7789 with_expressions = [] 7790 while self._match(TokenType.WITH): 7791 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7792 if self._match_texts(("SYNC", "ASYNC")): 7793 if self._match_text_seq("MODE", advance=False): 7794 with_expressions.append(f"{self._prev.text.upper()} MODE") 7795 self._advance() 7796 else: 7797 buckets = self._parse_number() 7798 if self._match_text_seq("BUCKETS"): 7799 with_expressions.append(f"{buckets} BUCKETS") 7800 if with_expressions: 7801 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7802 7803 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7804 TokenType.UPDATE, advance=False 7805 ): 7806 update_options = self._prev.text.upper() 7807 self._advance() 7808 elif self._match_text_seq("USING", "DATA"): 7809 expression = self.expression(exp.UsingData, this=self._parse_string()) 7810 7811 return self.expression( 7812 exp.AnalyzeHistogram, 7813 this=this, 7814 expressions=expressions, 7815 expression=expression, 7816 update_options=update_options, 7817 ) 7818 7819 def _parse_merge(self) -> exp.Merge: 7820 self._match(TokenType.INTO) 7821 target = self._parse_table() 7822 7823 if target and self._match(TokenType.ALIAS, advance=False): 7824 target.set("alias", self._parse_table_alias()) 7825 7826 self._match(TokenType.USING) 7827 using = self._parse_table() 7828 7829 self._match(TokenType.ON) 7830 on = self._parse_assignment() 7831 7832 return self.expression( 7833 exp.Merge, 7834 this=target, 7835 using=using, 7836 on=on, 7837 whens=self._parse_when_matched(), 7838 returning=self._parse_returning(), 7839 ) 7840 7841 def _parse_when_matched(self) -> exp.Whens: 7842 whens = [] 7843 7844 while self._match(TokenType.WHEN): 7845 matched = not self._match(TokenType.NOT) 7846 self._match_text_seq("MATCHED") 7847 source = ( 7848 False 7849 if self._match_text_seq("BY", "TARGET") 7850 else self._match_text_seq("BY", "SOURCE") 7851 ) 7852 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7853 7854 self._match(TokenType.THEN) 7855 7856 if self._match(TokenType.INSERT): 7857 this = self._parse_star() 7858 if this: 7859 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7860 else: 7861 then = self.expression( 7862 exp.Insert, 7863 this=exp.var("ROW") 7864 if self._match_text_seq("ROW") 7865 else self._parse_value(values=False), 7866 expression=self._match_text_seq("VALUES") and self._parse_value(), 7867 ) 7868 elif self._match(TokenType.UPDATE): 7869 expressions = self._parse_star() 7870 if expressions: 7871 then = self.expression(exp.Update, expressions=expressions) 7872 else: 7873 then = self.expression( 7874 exp.Update, 7875 expressions=self._match(TokenType.SET) 7876 and self._parse_csv(self._parse_equality), 7877 ) 7878 elif self._match(TokenType.DELETE): 7879 then = self.expression(exp.Var, this=self._prev.text) 7880 else: 7881 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7882 7883 whens.append( 7884 self.expression( 7885 exp.When, 7886 matched=matched, 7887 source=source, 7888 condition=condition, 7889 then=then, 7890 ) 7891 ) 7892 return self.expression(exp.Whens, expressions=whens) 7893 7894 def _parse_show(self) -> t.Optional[exp.Expression]: 7895 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7896 if parser: 7897 return parser(self) 7898 return self._parse_as_command(self._prev) 7899 7900 def _parse_set_item_assignment( 7901 self, kind: t.Optional[str] = None 7902 ) -> t.Optional[exp.Expression]: 7903 index = self._index 7904 7905 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7906 return self._parse_set_transaction(global_=kind == "GLOBAL") 7907 7908 left = self._parse_primary() or self._parse_column() 7909 assignment_delimiter = self._match_texts(("=", "TO")) 7910 7911 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7912 self._retreat(index) 7913 return None 7914 7915 right = self._parse_statement() or self._parse_id_var() 7916 if isinstance(right, (exp.Column, exp.Identifier)): 7917 right = exp.var(right.name) 7918 7919 this = self.expression(exp.EQ, this=left, expression=right) 7920 return self.expression(exp.SetItem, this=this, kind=kind) 7921 7922 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7923 self._match_text_seq("TRANSACTION") 7924 characteristics = self._parse_csv( 7925 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7926 ) 7927 return self.expression( 7928 exp.SetItem, 7929 expressions=characteristics, 7930 kind="TRANSACTION", 7931 **{"global": global_}, # type: ignore 7932 ) 7933 7934 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7935 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7936 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7937 7938 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7939 index = self._index 7940 set_ = self.expression( 7941 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7942 ) 7943 7944 if self._curr: 7945 self._retreat(index) 7946 return self._parse_as_command(self._prev) 7947 7948 return set_ 7949 7950 def _parse_var_from_options( 7951 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7952 ) -> t.Optional[exp.Var]: 7953 start = self._curr 7954 if not start: 7955 return None 7956 7957 option = start.text.upper() 7958 continuations = options.get(option) 7959 7960 index = self._index 7961 self._advance() 7962 for keywords in continuations or []: 7963 if isinstance(keywords, str): 7964 keywords = (keywords,) 7965 7966 if self._match_text_seq(*keywords): 7967 option = f"{option} {' '.join(keywords)}" 7968 break 7969 else: 7970 if continuations or continuations is None: 7971 if raise_unmatched: 7972 self.raise_error(f"Unknown option {option}") 7973 7974 self._retreat(index) 7975 return None 7976 7977 return exp.var(option) 7978 7979 def _parse_as_command(self, start: Token) -> exp.Command: 7980 while self._curr: 7981 self._advance() 7982 text = self._find_sql(start, self._prev) 7983 size = len(start.text) 7984 self._warn_unsupported() 7985 return exp.Command(this=text[:size], expression=text[size:]) 7986 7987 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7988 settings = [] 7989 7990 self._match_l_paren() 7991 kind = self._parse_id_var() 7992 7993 if self._match(TokenType.L_PAREN): 7994 while True: 7995 key = self._parse_id_var() 7996 value = self._parse_primary() 7997 if not key and value is None: 7998 break 7999 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 8000 self._match(TokenType.R_PAREN) 8001 8002 self._match_r_paren() 8003 8004 return self.expression( 8005 exp.DictProperty, 8006 this=this, 8007 kind=kind.this if kind else None, 8008 settings=settings, 8009 ) 8010 8011 def _parse_dict_range(self, this: str) -> exp.DictRange: 8012 self._match_l_paren() 8013 has_min = self._match_text_seq("MIN") 8014 if has_min: 8015 min = self._parse_var() or self._parse_primary() 8016 self._match_text_seq("MAX") 8017 max = self._parse_var() or self._parse_primary() 8018 else: 8019 max = self._parse_var() or self._parse_primary() 8020 min = exp.Literal.number(0) 8021 self._match_r_paren() 8022 return self.expression(exp.DictRange, this=this, min=min, max=max) 8023 8024 def _parse_comprehension( 8025 self, this: t.Optional[exp.Expression] 8026 ) -> t.Optional[exp.Comprehension]: 8027 index = self._index 8028 expression = self._parse_column() 8029 if not self._match(TokenType.IN): 8030 self._retreat(index - 1) 8031 return None 8032 iterator = self._parse_column() 8033 condition = self._parse_assignment() if self._match_text_seq("IF") else None 8034 return self.expression( 8035 exp.Comprehension, 8036 this=this, 8037 expression=expression, 8038 iterator=iterator, 8039 condition=condition, 8040 ) 8041 8042 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 8043 if self._match(TokenType.HEREDOC_STRING): 8044 return self.expression(exp.Heredoc, this=self._prev.text) 8045 8046 if not self._match_text_seq("$"): 8047 return None 8048 8049 tags = ["$"] 8050 tag_text = None 8051 8052 if self._is_connected(): 8053 self._advance() 8054 tags.append(self._prev.text.upper()) 8055 else: 8056 self.raise_error("No closing $ found") 8057 8058 if tags[-1] != "$": 8059 if self._is_connected() and self._match_text_seq("$"): 8060 tag_text = tags[-1] 8061 tags.append("$") 8062 else: 8063 self.raise_error("No closing $ found") 8064 8065 heredoc_start = self._curr 8066 8067 while self._curr: 8068 if self._match_text_seq(*tags, advance=False): 8069 this = self._find_sql(heredoc_start, self._prev) 8070 self._advance(len(tags)) 8071 return self.expression(exp.Heredoc, this=this, tag=tag_text) 8072 8073 self._advance() 8074 8075 self.raise_error(f"No closing {''.join(tags)} found") 8076 return None 8077 8078 def _find_parser( 8079 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 8080 ) -> t.Optional[t.Callable]: 8081 if not self._curr: 8082 return None 8083 8084 index = self._index 8085 this = [] 8086 while True: 8087 # The current token might be multiple words 8088 curr = self._curr.text.upper() 8089 key = curr.split(" ") 8090 this.append(curr) 8091 8092 self._advance() 8093 result, trie = in_trie(trie, key) 8094 if result == TrieResult.FAILED: 8095 break 8096 8097 if result == TrieResult.EXISTS: 8098 subparser = parsers[" ".join(this)] 8099 return subparser 8100 8101 self._retreat(index) 8102 return None 8103 8104 def _match(self, token_type, advance=True, expression=None): 8105 if not self._curr: 8106 return None 8107 8108 if self._curr.token_type == token_type: 8109 if advance: 8110 self._advance() 8111 self._add_comments(expression) 8112 return True 8113 8114 return None 8115 8116 def _match_set(self, types, advance=True): 8117 if not self._curr: 8118 return None 8119 8120 if self._curr.token_type in types: 8121 if advance: 8122 self._advance() 8123 return True 8124 8125 return None 8126 8127 def _match_pair(self, token_type_a, token_type_b, advance=True): 8128 if not self._curr or not self._next: 8129 return None 8130 8131 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8132 if advance: 8133 self._advance(2) 8134 return True 8135 8136 return None 8137 8138 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8139 if not self._match(TokenType.L_PAREN, expression=expression): 8140 self.raise_error("Expecting (") 8141 8142 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8143 if not self._match(TokenType.R_PAREN, expression=expression): 8144 self.raise_error("Expecting )") 8145 8146 def _match_texts(self, texts, advance=True): 8147 if ( 8148 self._curr 8149 and self._curr.token_type != TokenType.STRING 8150 and self._curr.text.upper() in texts 8151 ): 8152 if advance: 8153 self._advance() 8154 return True 8155 return None 8156 8157 def _match_text_seq(self, *texts, advance=True): 8158 index = self._index 8159 for text in texts: 8160 if ( 8161 self._curr 8162 and self._curr.token_type != TokenType.STRING 8163 and self._curr.text.upper() == text 8164 ): 8165 self._advance() 8166 else: 8167 self._retreat(index) 8168 return None 8169 8170 if not advance: 8171 self._retreat(index) 8172 8173 return True 8174 8175 def _replace_lambda( 8176 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8177 ) -> t.Optional[exp.Expression]: 8178 if not node: 8179 return node 8180 8181 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8182 8183 for column in node.find_all(exp.Column): 8184 typ = lambda_types.get(column.parts[0].name) 8185 if typ is not None: 8186 dot_or_id = column.to_dot() if column.table else column.this 8187 8188 if typ: 8189 dot_or_id = self.expression( 8190 exp.Cast, 8191 this=dot_or_id, 8192 to=typ, 8193 ) 8194 8195 parent = column.parent 8196 8197 while isinstance(parent, exp.Dot): 8198 if not isinstance(parent.parent, exp.Dot): 8199 parent.replace(dot_or_id) 8200 break 8201 parent = parent.parent 8202 else: 8203 if column is node: 8204 node = dot_or_id 8205 else: 8206 column.replace(dot_or_id) 8207 return node 8208 8209 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8210 start = self._prev 8211 8212 # Not to be confused with TRUNCATE(number, decimals) function call 8213 if self._match(TokenType.L_PAREN): 8214 self._retreat(self._index - 2) 8215 return self._parse_function() 8216 8217 # Clickhouse supports TRUNCATE DATABASE as well 8218 is_database = self._match(TokenType.DATABASE) 8219 8220 self._match(TokenType.TABLE) 8221 8222 exists = self._parse_exists(not_=False) 8223 8224 expressions = self._parse_csv( 8225 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8226 ) 8227 8228 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8229 8230 if self._match_text_seq("RESTART", "IDENTITY"): 8231 identity = "RESTART" 8232 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8233 identity = "CONTINUE" 8234 else: 8235 identity = None 8236 8237 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8238 option = self._prev.text 8239 else: 8240 option = None 8241 8242 partition = self._parse_partition() 8243 8244 # Fallback case 8245 if self._curr: 8246 return self._parse_as_command(start) 8247 8248 return self.expression( 8249 exp.TruncateTable, 8250 expressions=expressions, 8251 is_database=is_database, 8252 exists=exists, 8253 cluster=cluster, 8254 identity=identity, 8255 option=option, 8256 partition=partition, 8257 ) 8258 8259 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8260 this = self._parse_ordered(self._parse_opclass) 8261 8262 if not self._match(TokenType.WITH): 8263 return this 8264 8265 op = self._parse_var(any_token=True) 8266 8267 return self.expression(exp.WithOperator, this=this, op=op) 8268 8269 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8270 self._match(TokenType.EQ) 8271 self._match(TokenType.L_PAREN) 8272 8273 opts: t.List[t.Optional[exp.Expression]] = [] 8274 option: exp.Expression | None 8275 while self._curr and not self._match(TokenType.R_PAREN): 8276 if self._match_text_seq("FORMAT_NAME", "="): 8277 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8278 option = self._parse_format_name() 8279 else: 8280 option = self._parse_property() 8281 8282 if option is None: 8283 self.raise_error("Unable to parse option") 8284 break 8285 8286 opts.append(option) 8287 8288 return opts 8289 8290 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8291 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8292 8293 options = [] 8294 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8295 option = self._parse_var(any_token=True) 8296 prev = self._prev.text.upper() 8297 8298 # Different dialects might separate options and values by white space, "=" and "AS" 8299 self._match(TokenType.EQ) 8300 self._match(TokenType.ALIAS) 8301 8302 param = self.expression(exp.CopyParameter, this=option) 8303 8304 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8305 TokenType.L_PAREN, advance=False 8306 ): 8307 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8308 param.set("expressions", self._parse_wrapped_options()) 8309 elif prev == "FILE_FORMAT": 8310 # T-SQL's external file format case 8311 param.set("expression", self._parse_field()) 8312 else: 8313 param.set("expression", self._parse_unquoted_field()) 8314 8315 options.append(param) 8316 self._match(sep) 8317 8318 return options 8319 8320 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8321 expr = self.expression(exp.Credentials) 8322 8323 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8324 expr.set("storage", self._parse_field()) 8325 if self._match_text_seq("CREDENTIALS"): 8326 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8327 creds = ( 8328 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8329 ) 8330 expr.set("credentials", creds) 8331 if self._match_text_seq("ENCRYPTION"): 8332 expr.set("encryption", self._parse_wrapped_options()) 8333 if self._match_text_seq("IAM_ROLE"): 8334 expr.set("iam_role", self._parse_field()) 8335 if self._match_text_seq("REGION"): 8336 expr.set("region", self._parse_field()) 8337 8338 return expr 8339 8340 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8341 return self._parse_field() 8342 8343 def _parse_copy(self) -> exp.Copy | exp.Command: 8344 start = self._prev 8345 8346 self._match(TokenType.INTO) 8347 8348 this = ( 8349 self._parse_select(nested=True, parse_subquery_alias=False) 8350 if self._match(TokenType.L_PAREN, advance=False) 8351 else self._parse_table(schema=True) 8352 ) 8353 8354 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8355 8356 files = self._parse_csv(self._parse_file_location) 8357 credentials = self._parse_credentials() 8358 8359 self._match_text_seq("WITH") 8360 8361 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8362 8363 # Fallback case 8364 if self._curr: 8365 return self._parse_as_command(start) 8366 8367 return self.expression( 8368 exp.Copy, 8369 this=this, 8370 kind=kind, 8371 credentials=credentials, 8372 files=files, 8373 params=params, 8374 ) 8375 8376 def _parse_normalize(self) -> exp.Normalize: 8377 return self.expression( 8378 exp.Normalize, 8379 this=self._parse_bitwise(), 8380 form=self._match(TokenType.COMMA) and self._parse_var(), 8381 ) 8382 8383 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8384 args = self._parse_csv(lambda: self._parse_lambda()) 8385 8386 this = seq_get(args, 0) 8387 decimals = seq_get(args, 1) 8388 8389 return expr_type( 8390 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8391 ) 8392 8393 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8394 star_token = self._prev 8395 8396 if self._match_text_seq("COLUMNS", "(", advance=False): 8397 this = self._parse_function() 8398 if isinstance(this, exp.Columns): 8399 this.set("unpack", True) 8400 return this 8401 8402 return self.expression( 8403 exp.Star, 8404 **{ # type: ignore 8405 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8406 "replace": self._parse_star_op("REPLACE"), 8407 "rename": self._parse_star_op("RENAME"), 8408 }, 8409 ).update_positions(star_token) 8410 8411 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8412 privilege_parts = [] 8413 8414 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8415 # (end of privilege list) or L_PAREN (start of column list) are met 8416 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8417 privilege_parts.append(self._curr.text.upper()) 8418 self._advance() 8419 8420 this = exp.var(" ".join(privilege_parts)) 8421 expressions = ( 8422 self._parse_wrapped_csv(self._parse_column) 8423 if self._match(TokenType.L_PAREN, advance=False) 8424 else None 8425 ) 8426 8427 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8428 8429 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8430 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8431 principal = self._parse_id_var() 8432 8433 if not principal: 8434 return None 8435 8436 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8437 8438 def _parse_grant_revoke_common( 8439 self, 8440 ) -> t.Tuple[t.Optional[t.List], t.Optional[str], t.Optional[exp.Expression]]: 8441 privileges = self._parse_csv(self._parse_grant_privilege) 8442 8443 self._match(TokenType.ON) 8444 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8445 8446 # Attempt to parse the securable e.g. MySQL allows names 8447 # such as "foo.*", "*.*" which are not easily parseable yet 8448 securable = self._try_parse(self._parse_table_parts) 8449 8450 return privileges, kind, securable 8451 8452 def _parse_grant(self) -> exp.Grant | exp.Command: 8453 start = self._prev 8454 8455 privileges, kind, securable = self._parse_grant_revoke_common() 8456 8457 if not securable or not self._match_text_seq("TO"): 8458 return self._parse_as_command(start) 8459 8460 principals = self._parse_csv(self._parse_grant_principal) 8461 8462 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8463 8464 if self._curr: 8465 return self._parse_as_command(start) 8466 8467 return self.expression( 8468 exp.Grant, 8469 privileges=privileges, 8470 kind=kind, 8471 securable=securable, 8472 principals=principals, 8473 grant_option=grant_option, 8474 ) 8475 8476 def _parse_revoke(self) -> exp.Revoke | exp.Command: 8477 start = self._prev 8478 8479 grant_option = self._match_text_seq("GRANT", "OPTION", "FOR") 8480 8481 privileges, kind, securable = self._parse_grant_revoke_common() 8482 8483 if not securable or not self._match_text_seq("FROM"): 8484 return self._parse_as_command(start) 8485 8486 principals = self._parse_csv(self._parse_grant_principal) 8487 8488 cascade = None 8489 if self._match_texts(("CASCADE", "RESTRICT")): 8490 cascade = self._prev.text.upper() 8491 8492 if self._curr: 8493 return self._parse_as_command(start) 8494 8495 return self.expression( 8496 exp.Revoke, 8497 privileges=privileges, 8498 kind=kind, 8499 securable=securable, 8500 principals=principals, 8501 grant_option=grant_option, 8502 cascade=cascade, 8503 ) 8504 8505 def _parse_overlay(self) -> exp.Overlay: 8506 return self.expression( 8507 exp.Overlay, 8508 **{ # type: ignore 8509 "this": self._parse_bitwise(), 8510 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8511 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8512 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8513 }, 8514 ) 8515 8516 def _parse_format_name(self) -> exp.Property: 8517 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8518 # for FILE_FORMAT = <format_name> 8519 return self.expression( 8520 exp.Property, 8521 this=exp.var("FORMAT_NAME"), 8522 value=self._parse_string() or self._parse_table_parts(), 8523 ) 8524 8525 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8526 args: t.List[exp.Expression] = [] 8527 8528 if self._match(TokenType.DISTINCT): 8529 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8530 self._match(TokenType.COMMA) 8531 8532 args.extend(self._parse_csv(self._parse_assignment)) 8533 8534 return self.expression( 8535 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8536 ) 8537 8538 def _identifier_expression( 8539 self, token: t.Optional[Token] = None, **kwargs: t.Any 8540 ) -> exp.Identifier: 8541 token = token or self._prev 8542 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8543 expression.update_positions(token) 8544 return expression 8545 8546 def _build_pipe_cte( 8547 self, 8548 query: exp.Query, 8549 expressions: t.List[exp.Expression], 8550 alias_cte: t.Optional[exp.TableAlias] = None, 8551 ) -> exp.Select: 8552 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8553 if alias_cte: 8554 new_cte = alias_cte 8555 else: 8556 self._pipe_cte_counter += 1 8557 new_cte = f"__tmp{self._pipe_cte_counter}" 8558 8559 with_ = query.args.get("with") 8560 ctes = with_.pop() if with_ else None 8561 8562 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8563 if ctes: 8564 new_select.set("with", ctes) 8565 8566 return new_select.with_(new_cte, as_=query, copy=False) 8567 8568 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8569 select = self._parse_select(consume_pipe=False) 8570 if not select: 8571 return query 8572 8573 return self._build_pipe_cte( 8574 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8575 ) 8576 8577 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8578 limit = self._parse_limit() 8579 offset = self._parse_offset() 8580 if limit: 8581 curr_limit = query.args.get("limit", limit) 8582 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8583 query.limit(limit, copy=False) 8584 if offset: 8585 curr_offset = query.args.get("offset") 8586 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8587 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8588 8589 return query 8590 8591 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8592 this = self._parse_assignment() 8593 if self._match_text_seq("GROUP", "AND", advance=False): 8594 return this 8595 8596 this = self._parse_alias(this) 8597 8598 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8599 return self._parse_ordered(lambda: this) 8600 8601 return this 8602 8603 def _parse_pipe_syntax_aggregate_group_order_by( 8604 self, query: exp.Select, group_by_exists: bool = True 8605 ) -> exp.Select: 8606 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8607 aggregates_or_groups, orders = [], [] 8608 for element in expr: 8609 if isinstance(element, exp.Ordered): 8610 this = element.this 8611 if isinstance(this, exp.Alias): 8612 element.set("this", this.args["alias"]) 8613 orders.append(element) 8614 else: 8615 this = element 8616 aggregates_or_groups.append(this) 8617 8618 if group_by_exists: 8619 query.select(*aggregates_or_groups, copy=False).group_by( 8620 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8621 copy=False, 8622 ) 8623 else: 8624 query.select(*aggregates_or_groups, append=False, copy=False) 8625 8626 if orders: 8627 return query.order_by(*orders, append=False, copy=False) 8628 8629 return query 8630 8631 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8632 self._match_text_seq("AGGREGATE") 8633 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8634 8635 if self._match(TokenType.GROUP_BY) or ( 8636 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8637 ): 8638 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8639 8640 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8641 8642 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8643 first_setop = self.parse_set_operation(this=query) 8644 if not first_setop: 8645 return None 8646 8647 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8648 expr = self._parse_paren() 8649 return expr.assert_is(exp.Subquery).unnest() if expr else None 8650 8651 first_setop.this.pop() 8652 8653 setops = [ 8654 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8655 *self._parse_csv(_parse_and_unwrap_query), 8656 ] 8657 8658 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8659 with_ = query.args.get("with") 8660 ctes = with_.pop() if with_ else None 8661 8662 if isinstance(first_setop, exp.Union): 8663 query = query.union(*setops, copy=False, **first_setop.args) 8664 elif isinstance(first_setop, exp.Except): 8665 query = query.except_(*setops, copy=False, **first_setop.args) 8666 else: 8667 query = query.intersect(*setops, copy=False, **first_setop.args) 8668 8669 query.set("with", ctes) 8670 8671 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8672 8673 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8674 join = self._parse_join() 8675 if not join: 8676 return None 8677 8678 if isinstance(query, exp.Select): 8679 return query.join(join, copy=False) 8680 8681 return query 8682 8683 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8684 pivots = self._parse_pivots() 8685 if not pivots: 8686 return query 8687 8688 from_ = query.args.get("from") 8689 if from_: 8690 from_.this.set("pivots", pivots) 8691 8692 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8693 8694 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8695 self._match_text_seq("EXTEND") 8696 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8697 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8698 8699 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8700 sample = self._parse_table_sample() 8701 8702 with_ = query.args.get("with") 8703 if with_: 8704 with_.expressions[-1].this.set("sample", sample) 8705 else: 8706 query.set("sample", sample) 8707 8708 return query 8709 8710 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8711 if isinstance(query, exp.Subquery): 8712 query = exp.select("*").from_(query, copy=False) 8713 8714 if not query.args.get("from"): 8715 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8716 8717 while self._match(TokenType.PIPE_GT): 8718 start = self._curr 8719 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8720 if not parser: 8721 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8722 # keywords, making it tricky to disambiguate them without lookahead. The approach 8723 # here is to try and parse a set operation and if that fails, then try to parse a 8724 # join operator. If that fails as well, then the operator is not supported. 8725 parsed_query = self._parse_pipe_syntax_set_operator(query) 8726 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8727 if not parsed_query: 8728 self._retreat(start) 8729 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8730 break 8731 query = parsed_query 8732 else: 8733 query = parser(self, query) 8734 8735 return query 8736 8737 def _parse_declareitem(self) -> t.Optional[exp.DeclareItem]: 8738 vars = self._parse_csv(self._parse_id_var) 8739 if not vars: 8740 return None 8741 8742 return self.expression( 8743 exp.DeclareItem, 8744 this=vars, 8745 kind=self._parse_types(), 8746 default=self._match(TokenType.DEFAULT) and self._parse_bitwise(), 8747 ) 8748 8749 def _parse_declare(self) -> exp.Declare | exp.Command: 8750 start = self._prev 8751 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 8752 8753 if not expressions or self._curr: 8754 return self._parse_as_command(start) 8755 8756 return self.expression(exp.Declare, expressions=expressions) 8757 8758 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 8759 exp_class = exp.Cast if strict else exp.TryCast 8760 8761 if exp_class == exp.TryCast: 8762 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 8763 8764 return self.expression(exp_class, **kwargs) 8765 8766 def _parse_json_value(self) -> exp.JSONValue: 8767 this = self._parse_bitwise() 8768 self._match(TokenType.COMMA) 8769 path = self._parse_bitwise() 8770 8771 returning = self._match(TokenType.RETURNING) and self._parse_type() 8772 8773 return self.expression( 8774 exp.JSONValue, 8775 this=this, 8776 path=self.dialect.to_json_path(path), 8777 returning=returning, 8778 on_condition=self._parse_on_condition(), 8779 ) 8780 8781 def _parse_group_concat(self) -> t.Optional[exp.Expression]: 8782 def concat_exprs( 8783 node: t.Optional[exp.Expression], exprs: t.List[exp.Expression] 8784 ) -> exp.Expression: 8785 if isinstance(node, exp.Distinct) and len(node.expressions) > 1: 8786 concat_exprs = [ 8787 self.expression(exp.Concat, expressions=node.expressions, safe=True) 8788 ] 8789 node.set("expressions", concat_exprs) 8790 return node 8791 if len(exprs) == 1: 8792 return exprs[0] 8793 return self.expression(exp.Concat, expressions=args, safe=True) 8794 8795 args = self._parse_csv(self._parse_lambda) 8796 8797 if args: 8798 order = args[-1] if isinstance(args[-1], exp.Order) else None 8799 8800 if order: 8801 # Order By is the last (or only) expression in the list and has consumed the 'expr' before it, 8802 # remove 'expr' from exp.Order and add it back to args 8803 args[-1] = order.this 8804 order.set("this", concat_exprs(order.this, args)) 8805 8806 this = order or concat_exprs(args[0], args) 8807 else: 8808 this = None 8809 8810 separator = self._parse_field() if self._match(TokenType.SEPARATOR) else None 8811 8812 return self.expression(exp.GroupConcat, this=this, separator=separator)
32def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 33 if len(args) == 1 and args[0].is_star: 34 return exp.StarMap(this=args[0]) 35 36 keys = [] 37 values = [] 38 for i in range(0, len(args), 2): 39 keys.append(args[i]) 40 values.append(args[i + 1]) 41 42 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
50def binary_range_parser( 51 expr_type: t.Type[exp.Expression], reverse_args: bool = False 52) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 53 def _parse_binary_range( 54 self: Parser, this: t.Optional[exp.Expression] 55 ) -> t.Optional[exp.Expression]: 56 expression = self._parse_bitwise() 57 if reverse_args: 58 this, expression = expression, this 59 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 60 61 return _parse_binary_range
64def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 65 # Default argument order is base, expression 66 this = seq_get(args, 0) 67 expression = seq_get(args, 1) 68 69 if expression: 70 if not dialect.LOG_BASE_FIRST: 71 this, expression = expression, this 72 return exp.Log(this=this, expression=expression) 73 74 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
94def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 95 def _builder(args: t.List, dialect: Dialect) -> E: 96 expression = expr_type( 97 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 98 ) 99 if len(args) > 2 and expr_type is exp.JSONExtract: 100 expression.set("expressions", args[2:]) 101 102 return expression 103 104 return _builder
107def build_mod(args: t.List) -> exp.Mod: 108 this = seq_get(args, 0) 109 expression = seq_get(args, 1) 110 111 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 112 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 113 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 114 115 return exp.Mod(this=this, expression=expression)
127def build_array_constructor( 128 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 129) -> exp.Expression: 130 array_exp = exp_class(expressions=args) 131 132 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 133 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 134 135 return array_exp
138def build_convert_timezone( 139 args: t.List, default_source_tz: t.Optional[str] = None 140) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 141 if len(args) == 2: 142 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 143 return exp.ConvertTimezone( 144 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 145 ) 146 147 return exp.ConvertTimezone.from_arg_list(args)
182class Parser(metaclass=_Parser): 183 """ 184 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 185 186 Args: 187 error_level: The desired error level. 188 Default: ErrorLevel.IMMEDIATE 189 error_message_context: The amount of context to capture from a query string when displaying 190 the error message (in number of characters). 191 Default: 100 192 max_errors: Maximum number of error messages to include in a raised ParseError. 193 This is only relevant if error_level is ErrorLevel.RAISE. 194 Default: 3 195 """ 196 197 FUNCTIONS: t.Dict[str, t.Callable] = { 198 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 199 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 200 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 201 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 202 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 203 ), 204 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 205 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 206 ), 207 "CHAR": lambda args: exp.Chr(expressions=args), 208 "CHR": lambda args: exp.Chr(expressions=args), 209 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 210 "CONCAT": lambda args, dialect: exp.Concat( 211 expressions=args, 212 safe=not dialect.STRICT_STRING_CONCAT, 213 coalesce=dialect.CONCAT_COALESCE, 214 ), 215 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 216 expressions=args, 217 safe=not dialect.STRICT_STRING_CONCAT, 218 coalesce=dialect.CONCAT_COALESCE, 219 ), 220 "CONVERT_TIMEZONE": build_convert_timezone, 221 "DATE_TO_DATE_STR": lambda args: exp.Cast( 222 this=seq_get(args, 0), 223 to=exp.DataType(this=exp.DataType.Type.TEXT), 224 ), 225 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 226 start=seq_get(args, 0), 227 end=seq_get(args, 1), 228 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 229 ), 230 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 231 "HEX": build_hex, 232 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 233 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 234 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 235 "LIKE": build_like, 236 "LOG": build_logarithm, 237 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 238 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 239 "LOWER": build_lower, 240 "LPAD": lambda args: build_pad(args), 241 "LEFTPAD": lambda args: build_pad(args), 242 "LTRIM": lambda args: build_trim(args), 243 "MOD": build_mod, 244 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 245 "RPAD": lambda args: build_pad(args, is_left=False), 246 "RTRIM": lambda args: build_trim(args, is_left=False), 247 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 248 if len(args) != 2 249 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 250 "STRPOS": exp.StrPosition.from_arg_list, 251 "CHARINDEX": lambda args: build_locate_strposition(args), 252 "INSTR": exp.StrPosition.from_arg_list, 253 "LOCATE": lambda args: build_locate_strposition(args), 254 "TIME_TO_TIME_STR": lambda args: exp.Cast( 255 this=seq_get(args, 0), 256 to=exp.DataType(this=exp.DataType.Type.TEXT), 257 ), 258 "TO_HEX": build_hex, 259 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 260 this=exp.Cast( 261 this=seq_get(args, 0), 262 to=exp.DataType(this=exp.DataType.Type.TEXT), 263 ), 264 start=exp.Literal.number(1), 265 length=exp.Literal.number(10), 266 ), 267 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 268 "UPPER": build_upper, 269 "VAR_MAP": build_var_map, 270 } 271 272 NO_PAREN_FUNCTIONS = { 273 TokenType.CURRENT_DATE: exp.CurrentDate, 274 TokenType.CURRENT_DATETIME: exp.CurrentDate, 275 TokenType.CURRENT_TIME: exp.CurrentTime, 276 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 277 TokenType.CURRENT_USER: exp.CurrentUser, 278 } 279 280 STRUCT_TYPE_TOKENS = { 281 TokenType.NESTED, 282 TokenType.OBJECT, 283 TokenType.STRUCT, 284 TokenType.UNION, 285 } 286 287 NESTED_TYPE_TOKENS = { 288 TokenType.ARRAY, 289 TokenType.LIST, 290 TokenType.LOWCARDINALITY, 291 TokenType.MAP, 292 TokenType.NULLABLE, 293 TokenType.RANGE, 294 *STRUCT_TYPE_TOKENS, 295 } 296 297 ENUM_TYPE_TOKENS = { 298 TokenType.DYNAMIC, 299 TokenType.ENUM, 300 TokenType.ENUM8, 301 TokenType.ENUM16, 302 } 303 304 AGGREGATE_TYPE_TOKENS = { 305 TokenType.AGGREGATEFUNCTION, 306 TokenType.SIMPLEAGGREGATEFUNCTION, 307 } 308 309 TYPE_TOKENS = { 310 TokenType.BIT, 311 TokenType.BOOLEAN, 312 TokenType.TINYINT, 313 TokenType.UTINYINT, 314 TokenType.SMALLINT, 315 TokenType.USMALLINT, 316 TokenType.INT, 317 TokenType.UINT, 318 TokenType.BIGINT, 319 TokenType.UBIGINT, 320 TokenType.INT128, 321 TokenType.UINT128, 322 TokenType.INT256, 323 TokenType.UINT256, 324 TokenType.MEDIUMINT, 325 TokenType.UMEDIUMINT, 326 TokenType.FIXEDSTRING, 327 TokenType.FLOAT, 328 TokenType.DOUBLE, 329 TokenType.UDOUBLE, 330 TokenType.CHAR, 331 TokenType.NCHAR, 332 TokenType.VARCHAR, 333 TokenType.NVARCHAR, 334 TokenType.BPCHAR, 335 TokenType.TEXT, 336 TokenType.MEDIUMTEXT, 337 TokenType.LONGTEXT, 338 TokenType.BLOB, 339 TokenType.MEDIUMBLOB, 340 TokenType.LONGBLOB, 341 TokenType.BINARY, 342 TokenType.VARBINARY, 343 TokenType.JSON, 344 TokenType.JSONB, 345 TokenType.INTERVAL, 346 TokenType.TINYBLOB, 347 TokenType.TINYTEXT, 348 TokenType.TIME, 349 TokenType.TIMETZ, 350 TokenType.TIMESTAMP, 351 TokenType.TIMESTAMP_S, 352 TokenType.TIMESTAMP_MS, 353 TokenType.TIMESTAMP_NS, 354 TokenType.TIMESTAMPTZ, 355 TokenType.TIMESTAMPLTZ, 356 TokenType.TIMESTAMPNTZ, 357 TokenType.DATETIME, 358 TokenType.DATETIME2, 359 TokenType.DATETIME64, 360 TokenType.SMALLDATETIME, 361 TokenType.DATE, 362 TokenType.DATE32, 363 TokenType.INT4RANGE, 364 TokenType.INT4MULTIRANGE, 365 TokenType.INT8RANGE, 366 TokenType.INT8MULTIRANGE, 367 TokenType.NUMRANGE, 368 TokenType.NUMMULTIRANGE, 369 TokenType.TSRANGE, 370 TokenType.TSMULTIRANGE, 371 TokenType.TSTZRANGE, 372 TokenType.TSTZMULTIRANGE, 373 TokenType.DATERANGE, 374 TokenType.DATEMULTIRANGE, 375 TokenType.DECIMAL, 376 TokenType.DECIMAL32, 377 TokenType.DECIMAL64, 378 TokenType.DECIMAL128, 379 TokenType.DECIMAL256, 380 TokenType.UDECIMAL, 381 TokenType.BIGDECIMAL, 382 TokenType.UUID, 383 TokenType.GEOGRAPHY, 384 TokenType.GEOGRAPHYPOINT, 385 TokenType.GEOMETRY, 386 TokenType.POINT, 387 TokenType.RING, 388 TokenType.LINESTRING, 389 TokenType.MULTILINESTRING, 390 TokenType.POLYGON, 391 TokenType.MULTIPOLYGON, 392 TokenType.HLLSKETCH, 393 TokenType.HSTORE, 394 TokenType.PSEUDO_TYPE, 395 TokenType.SUPER, 396 TokenType.SERIAL, 397 TokenType.SMALLSERIAL, 398 TokenType.BIGSERIAL, 399 TokenType.XML, 400 TokenType.YEAR, 401 TokenType.USERDEFINED, 402 TokenType.MONEY, 403 TokenType.SMALLMONEY, 404 TokenType.ROWVERSION, 405 TokenType.IMAGE, 406 TokenType.VARIANT, 407 TokenType.VECTOR, 408 TokenType.VOID, 409 TokenType.OBJECT, 410 TokenType.OBJECT_IDENTIFIER, 411 TokenType.INET, 412 TokenType.IPADDRESS, 413 TokenType.IPPREFIX, 414 TokenType.IPV4, 415 TokenType.IPV6, 416 TokenType.UNKNOWN, 417 TokenType.NOTHING, 418 TokenType.NULL, 419 TokenType.NAME, 420 TokenType.TDIGEST, 421 TokenType.DYNAMIC, 422 *ENUM_TYPE_TOKENS, 423 *NESTED_TYPE_TOKENS, 424 *AGGREGATE_TYPE_TOKENS, 425 } 426 427 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 428 TokenType.BIGINT: TokenType.UBIGINT, 429 TokenType.INT: TokenType.UINT, 430 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 431 TokenType.SMALLINT: TokenType.USMALLINT, 432 TokenType.TINYINT: TokenType.UTINYINT, 433 TokenType.DECIMAL: TokenType.UDECIMAL, 434 TokenType.DOUBLE: TokenType.UDOUBLE, 435 } 436 437 SUBQUERY_PREDICATES = { 438 TokenType.ANY: exp.Any, 439 TokenType.ALL: exp.All, 440 TokenType.EXISTS: exp.Exists, 441 TokenType.SOME: exp.Any, 442 } 443 444 RESERVED_TOKENS = { 445 *Tokenizer.SINGLE_TOKENS.values(), 446 TokenType.SELECT, 447 } - {TokenType.IDENTIFIER} 448 449 DB_CREATABLES = { 450 TokenType.DATABASE, 451 TokenType.DICTIONARY, 452 TokenType.FILE_FORMAT, 453 TokenType.MODEL, 454 TokenType.NAMESPACE, 455 TokenType.SCHEMA, 456 TokenType.SEMANTIC_VIEW, 457 TokenType.SEQUENCE, 458 TokenType.SINK, 459 TokenType.SOURCE, 460 TokenType.STAGE, 461 TokenType.STORAGE_INTEGRATION, 462 TokenType.STREAMLIT, 463 TokenType.TABLE, 464 TokenType.TAG, 465 TokenType.VIEW, 466 TokenType.WAREHOUSE, 467 } 468 469 CREATABLES = { 470 TokenType.COLUMN, 471 TokenType.CONSTRAINT, 472 TokenType.FOREIGN_KEY, 473 TokenType.FUNCTION, 474 TokenType.INDEX, 475 TokenType.PROCEDURE, 476 *DB_CREATABLES, 477 } 478 479 ALTERABLES = { 480 TokenType.INDEX, 481 TokenType.TABLE, 482 TokenType.VIEW, 483 TokenType.SESSION, 484 } 485 486 # Tokens that can represent identifiers 487 ID_VAR_TOKENS = { 488 TokenType.ALL, 489 TokenType.ATTACH, 490 TokenType.VAR, 491 TokenType.ANTI, 492 TokenType.APPLY, 493 TokenType.ASC, 494 TokenType.ASOF, 495 TokenType.AUTO_INCREMENT, 496 TokenType.BEGIN, 497 TokenType.BPCHAR, 498 TokenType.CACHE, 499 TokenType.CASE, 500 TokenType.COLLATE, 501 TokenType.COMMAND, 502 TokenType.COMMENT, 503 TokenType.COMMIT, 504 TokenType.CONSTRAINT, 505 TokenType.COPY, 506 TokenType.CUBE, 507 TokenType.CURRENT_SCHEMA, 508 TokenType.DEFAULT, 509 TokenType.DELETE, 510 TokenType.DESC, 511 TokenType.DESCRIBE, 512 TokenType.DETACH, 513 TokenType.DICTIONARY, 514 TokenType.DIV, 515 TokenType.END, 516 TokenType.EXECUTE, 517 TokenType.EXPORT, 518 TokenType.ESCAPE, 519 TokenType.FALSE, 520 TokenType.FIRST, 521 TokenType.FILTER, 522 TokenType.FINAL, 523 TokenType.FORMAT, 524 TokenType.FULL, 525 TokenType.GET, 526 TokenType.IDENTIFIER, 527 TokenType.IS, 528 TokenType.ISNULL, 529 TokenType.INTERVAL, 530 TokenType.KEEP, 531 TokenType.KILL, 532 TokenType.LEFT, 533 TokenType.LIMIT, 534 TokenType.LOAD, 535 TokenType.MERGE, 536 TokenType.NATURAL, 537 TokenType.NEXT, 538 TokenType.OFFSET, 539 TokenType.OPERATOR, 540 TokenType.ORDINALITY, 541 TokenType.OVERLAPS, 542 TokenType.OVERWRITE, 543 TokenType.PARTITION, 544 TokenType.PERCENT, 545 TokenType.PIVOT, 546 TokenType.PRAGMA, 547 TokenType.PUT, 548 TokenType.RANGE, 549 TokenType.RECURSIVE, 550 TokenType.REFERENCES, 551 TokenType.REFRESH, 552 TokenType.RENAME, 553 TokenType.REPLACE, 554 TokenType.RIGHT, 555 TokenType.ROLLUP, 556 TokenType.ROW, 557 TokenType.ROWS, 558 TokenType.SEMI, 559 TokenType.SET, 560 TokenType.SETTINGS, 561 TokenType.SHOW, 562 TokenType.TEMPORARY, 563 TokenType.TOP, 564 TokenType.TRUE, 565 TokenType.TRUNCATE, 566 TokenType.UNIQUE, 567 TokenType.UNNEST, 568 TokenType.UNPIVOT, 569 TokenType.UPDATE, 570 TokenType.USE, 571 TokenType.VOLATILE, 572 TokenType.WINDOW, 573 *CREATABLES, 574 *SUBQUERY_PREDICATES, 575 *TYPE_TOKENS, 576 *NO_PAREN_FUNCTIONS, 577 } 578 ID_VAR_TOKENS.remove(TokenType.UNION) 579 580 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 581 TokenType.ANTI, 582 TokenType.ASOF, 583 TokenType.FULL, 584 TokenType.LEFT, 585 TokenType.LOCK, 586 TokenType.NATURAL, 587 TokenType.RIGHT, 588 TokenType.SEMI, 589 TokenType.WINDOW, 590 } 591 592 ALIAS_TOKENS = ID_VAR_TOKENS 593 594 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 595 596 ARRAY_CONSTRUCTORS = { 597 "ARRAY": exp.Array, 598 "LIST": exp.List, 599 } 600 601 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 602 603 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 604 605 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 606 607 FUNC_TOKENS = { 608 TokenType.COLLATE, 609 TokenType.COMMAND, 610 TokenType.CURRENT_DATE, 611 TokenType.CURRENT_DATETIME, 612 TokenType.CURRENT_SCHEMA, 613 TokenType.CURRENT_TIMESTAMP, 614 TokenType.CURRENT_TIME, 615 TokenType.CURRENT_USER, 616 TokenType.FILTER, 617 TokenType.FIRST, 618 TokenType.FORMAT, 619 TokenType.GET, 620 TokenType.GLOB, 621 TokenType.IDENTIFIER, 622 TokenType.INDEX, 623 TokenType.ISNULL, 624 TokenType.ILIKE, 625 TokenType.INSERT, 626 TokenType.LIKE, 627 TokenType.MERGE, 628 TokenType.NEXT, 629 TokenType.OFFSET, 630 TokenType.PRIMARY_KEY, 631 TokenType.RANGE, 632 TokenType.REPLACE, 633 TokenType.RLIKE, 634 TokenType.ROW, 635 TokenType.UNNEST, 636 TokenType.VAR, 637 TokenType.LEFT, 638 TokenType.RIGHT, 639 TokenType.SEQUENCE, 640 TokenType.DATE, 641 TokenType.DATETIME, 642 TokenType.TABLE, 643 TokenType.TIMESTAMP, 644 TokenType.TIMESTAMPTZ, 645 TokenType.TRUNCATE, 646 TokenType.UTC_DATE, 647 TokenType.UTC_TIME, 648 TokenType.WINDOW, 649 TokenType.XOR, 650 *TYPE_TOKENS, 651 *SUBQUERY_PREDICATES, 652 } 653 654 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 655 TokenType.AND: exp.And, 656 } 657 658 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 659 TokenType.COLON_EQ: exp.PropertyEQ, 660 } 661 662 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 663 TokenType.OR: exp.Or, 664 } 665 666 EQUALITY = { 667 TokenType.EQ: exp.EQ, 668 TokenType.NEQ: exp.NEQ, 669 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 670 } 671 672 COMPARISON = { 673 TokenType.GT: exp.GT, 674 TokenType.GTE: exp.GTE, 675 TokenType.LT: exp.LT, 676 TokenType.LTE: exp.LTE, 677 } 678 679 BITWISE = { 680 TokenType.AMP: exp.BitwiseAnd, 681 TokenType.CARET: exp.BitwiseXor, 682 TokenType.PIPE: exp.BitwiseOr, 683 } 684 685 TERM = { 686 TokenType.DASH: exp.Sub, 687 TokenType.PLUS: exp.Add, 688 TokenType.MOD: exp.Mod, 689 TokenType.COLLATE: exp.Collate, 690 } 691 692 FACTOR = { 693 TokenType.DIV: exp.IntDiv, 694 TokenType.LR_ARROW: exp.Distance, 695 TokenType.SLASH: exp.Div, 696 TokenType.STAR: exp.Mul, 697 } 698 699 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 700 701 TIMES = { 702 TokenType.TIME, 703 TokenType.TIMETZ, 704 } 705 706 TIMESTAMPS = { 707 TokenType.TIMESTAMP, 708 TokenType.TIMESTAMPNTZ, 709 TokenType.TIMESTAMPTZ, 710 TokenType.TIMESTAMPLTZ, 711 *TIMES, 712 } 713 714 SET_OPERATIONS = { 715 TokenType.UNION, 716 TokenType.INTERSECT, 717 TokenType.EXCEPT, 718 } 719 720 JOIN_METHODS = { 721 TokenType.ASOF, 722 TokenType.NATURAL, 723 TokenType.POSITIONAL, 724 } 725 726 JOIN_SIDES = { 727 TokenType.LEFT, 728 TokenType.RIGHT, 729 TokenType.FULL, 730 } 731 732 JOIN_KINDS = { 733 TokenType.ANTI, 734 TokenType.CROSS, 735 TokenType.INNER, 736 TokenType.OUTER, 737 TokenType.SEMI, 738 TokenType.STRAIGHT_JOIN, 739 } 740 741 JOIN_HINTS: t.Set[str] = set() 742 743 LAMBDAS = { 744 TokenType.ARROW: lambda self, expressions: self.expression( 745 exp.Lambda, 746 this=self._replace_lambda( 747 self._parse_assignment(), 748 expressions, 749 ), 750 expressions=expressions, 751 ), 752 TokenType.FARROW: lambda self, expressions: self.expression( 753 exp.Kwarg, 754 this=exp.var(expressions[0].name), 755 expression=self._parse_assignment(), 756 ), 757 } 758 759 COLUMN_OPERATORS = { 760 TokenType.DOT: None, 761 TokenType.DOTCOLON: lambda self, this, to: self.expression( 762 exp.JSONCast, 763 this=this, 764 to=to, 765 ), 766 TokenType.DCOLON: lambda self, this, to: self.build_cast( 767 strict=self.STRICT_CAST, this=this, to=to 768 ), 769 TokenType.ARROW: lambda self, this, path: self.expression( 770 exp.JSONExtract, 771 this=this, 772 expression=self.dialect.to_json_path(path), 773 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 774 ), 775 TokenType.DARROW: lambda self, this, path: self.expression( 776 exp.JSONExtractScalar, 777 this=this, 778 expression=self.dialect.to_json_path(path), 779 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 780 ), 781 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 782 exp.JSONBExtract, 783 this=this, 784 expression=path, 785 ), 786 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 787 exp.JSONBExtractScalar, 788 this=this, 789 expression=path, 790 ), 791 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 792 exp.JSONBContains, 793 this=this, 794 expression=key, 795 ), 796 } 797 798 CAST_COLUMN_OPERATORS = { 799 TokenType.DOTCOLON, 800 TokenType.DCOLON, 801 } 802 803 EXPRESSION_PARSERS = { 804 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 805 exp.Column: lambda self: self._parse_column(), 806 exp.Condition: lambda self: self._parse_assignment(), 807 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 808 exp.Expression: lambda self: self._parse_expression(), 809 exp.From: lambda self: self._parse_from(joins=True), 810 exp.Group: lambda self: self._parse_group(), 811 exp.Having: lambda self: self._parse_having(), 812 exp.Hint: lambda self: self._parse_hint_body(), 813 exp.Identifier: lambda self: self._parse_id_var(), 814 exp.Join: lambda self: self._parse_join(), 815 exp.Lambda: lambda self: self._parse_lambda(), 816 exp.Lateral: lambda self: self._parse_lateral(), 817 exp.Limit: lambda self: self._parse_limit(), 818 exp.Offset: lambda self: self._parse_offset(), 819 exp.Order: lambda self: self._parse_order(), 820 exp.Ordered: lambda self: self._parse_ordered(), 821 exp.Properties: lambda self: self._parse_properties(), 822 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 823 exp.Qualify: lambda self: self._parse_qualify(), 824 exp.Returning: lambda self: self._parse_returning(), 825 exp.Select: lambda self: self._parse_select(), 826 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 827 exp.Table: lambda self: self._parse_table_parts(), 828 exp.TableAlias: lambda self: self._parse_table_alias(), 829 exp.Tuple: lambda self: self._parse_value(values=False), 830 exp.Whens: lambda self: self._parse_when_matched(), 831 exp.Where: lambda self: self._parse_where(), 832 exp.Window: lambda self: self._parse_named_window(), 833 exp.With: lambda self: self._parse_with(), 834 "JOIN_TYPE": lambda self: self._parse_join_parts(), 835 } 836 837 STATEMENT_PARSERS = { 838 TokenType.ALTER: lambda self: self._parse_alter(), 839 TokenType.ANALYZE: lambda self: self._parse_analyze(), 840 TokenType.BEGIN: lambda self: self._parse_transaction(), 841 TokenType.CACHE: lambda self: self._parse_cache(), 842 TokenType.COMMENT: lambda self: self._parse_comment(), 843 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 844 TokenType.COPY: lambda self: self._parse_copy(), 845 TokenType.CREATE: lambda self: self._parse_create(), 846 TokenType.DELETE: lambda self: self._parse_delete(), 847 TokenType.DESC: lambda self: self._parse_describe(), 848 TokenType.DESCRIBE: lambda self: self._parse_describe(), 849 TokenType.DROP: lambda self: self._parse_drop(), 850 TokenType.GRANT: lambda self: self._parse_grant(), 851 TokenType.REVOKE: lambda self: self._parse_revoke(), 852 TokenType.INSERT: lambda self: self._parse_insert(), 853 TokenType.KILL: lambda self: self._parse_kill(), 854 TokenType.LOAD: lambda self: self._parse_load(), 855 TokenType.MERGE: lambda self: self._parse_merge(), 856 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 857 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 858 TokenType.REFRESH: lambda self: self._parse_refresh(), 859 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 860 TokenType.SET: lambda self: self._parse_set(), 861 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 862 TokenType.UNCACHE: lambda self: self._parse_uncache(), 863 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 864 TokenType.UPDATE: lambda self: self._parse_update(), 865 TokenType.USE: lambda self: self._parse_use(), 866 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 867 } 868 869 UNARY_PARSERS = { 870 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 871 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 872 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 873 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 874 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 875 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 876 } 877 878 STRING_PARSERS = { 879 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 880 exp.RawString, this=token.text 881 ), 882 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 883 exp.National, this=token.text 884 ), 885 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 886 TokenType.STRING: lambda self, token: self.expression( 887 exp.Literal, this=token.text, is_string=True 888 ), 889 TokenType.UNICODE_STRING: lambda self, token: self.expression( 890 exp.UnicodeString, 891 this=token.text, 892 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 893 ), 894 } 895 896 NUMERIC_PARSERS = { 897 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 898 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 899 TokenType.HEX_STRING: lambda self, token: self.expression( 900 exp.HexString, 901 this=token.text, 902 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 903 ), 904 TokenType.NUMBER: lambda self, token: self.expression( 905 exp.Literal, this=token.text, is_string=False 906 ), 907 } 908 909 PRIMARY_PARSERS = { 910 **STRING_PARSERS, 911 **NUMERIC_PARSERS, 912 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 913 TokenType.NULL: lambda self, _: self.expression(exp.Null), 914 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 915 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 916 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 917 TokenType.STAR: lambda self, _: self._parse_star_ops(), 918 } 919 920 PLACEHOLDER_PARSERS = { 921 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 922 TokenType.PARAMETER: lambda self: self._parse_parameter(), 923 TokenType.COLON: lambda self: ( 924 self.expression(exp.Placeholder, this=self._prev.text) 925 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 926 else None 927 ), 928 } 929 930 RANGE_PARSERS = { 931 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 932 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 933 TokenType.GLOB: binary_range_parser(exp.Glob), 934 TokenType.ILIKE: binary_range_parser(exp.ILike), 935 TokenType.IN: lambda self, this: self._parse_in(this), 936 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 937 TokenType.IS: lambda self, this: self._parse_is(this), 938 TokenType.LIKE: binary_range_parser(exp.Like), 939 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 940 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 941 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 942 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 943 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 944 } 945 946 PIPE_SYNTAX_TRANSFORM_PARSERS = { 947 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 948 "AS": lambda self, query: self._build_pipe_cte( 949 query, [exp.Star()], self._parse_table_alias() 950 ), 951 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 952 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 953 "ORDER BY": lambda self, query: query.order_by( 954 self._parse_order(), append=False, copy=False 955 ), 956 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 957 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 958 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 959 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 960 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 961 } 962 963 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 964 "ALLOWED_VALUES": lambda self: self.expression( 965 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 966 ), 967 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 968 "AUTO": lambda self: self._parse_auto_property(), 969 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 970 "BACKUP": lambda self: self.expression( 971 exp.BackupProperty, this=self._parse_var(any_token=True) 972 ), 973 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 974 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 975 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 976 "CHECKSUM": lambda self: self._parse_checksum(), 977 "CLUSTER BY": lambda self: self._parse_cluster(), 978 "CLUSTERED": lambda self: self._parse_clustered_by(), 979 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 980 exp.CollateProperty, **kwargs 981 ), 982 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 983 "CONTAINS": lambda self: self._parse_contains_property(), 984 "COPY": lambda self: self._parse_copy_property(), 985 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 986 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 987 "DEFINER": lambda self: self._parse_definer(), 988 "DETERMINISTIC": lambda self: self.expression( 989 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 990 ), 991 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 992 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 993 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 994 "DISTKEY": lambda self: self._parse_distkey(), 995 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 996 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 997 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 998 "ENVIRONMENT": lambda self: self.expression( 999 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 1000 ), 1001 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 1002 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 1003 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 1004 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1005 "FREESPACE": lambda self: self._parse_freespace(), 1006 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 1007 "HEAP": lambda self: self.expression(exp.HeapProperty), 1008 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1009 "IMMUTABLE": lambda self: self.expression( 1010 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1011 ), 1012 "INHERITS": lambda self: self.expression( 1013 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1014 ), 1015 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1016 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1017 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1018 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1019 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1020 "LIKE": lambda self: self._parse_create_like(), 1021 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1022 "LOCK": lambda self: self._parse_locking(), 1023 "LOCKING": lambda self: self._parse_locking(), 1024 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1025 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1026 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1027 "MODIFIES": lambda self: self._parse_modifies_property(), 1028 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1029 "NO": lambda self: self._parse_no_property(), 1030 "ON": lambda self: self._parse_on_property(), 1031 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1032 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1033 "PARTITION": lambda self: self._parse_partitioned_of(), 1034 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1035 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1036 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1037 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1038 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1039 "READS": lambda self: self._parse_reads_property(), 1040 "REMOTE": lambda self: self._parse_remote_with_connection(), 1041 "RETURNS": lambda self: self._parse_returns(), 1042 "STRICT": lambda self: self.expression(exp.StrictProperty), 1043 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1044 "ROW": lambda self: self._parse_row(), 1045 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1046 "SAMPLE": lambda self: self.expression( 1047 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1048 ), 1049 "SECURE": lambda self: self.expression(exp.SecureProperty), 1050 "SECURITY": lambda self: self._parse_security(), 1051 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1052 "SETTINGS": lambda self: self._parse_settings_property(), 1053 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1054 "SORTKEY": lambda self: self._parse_sortkey(), 1055 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1056 "STABLE": lambda self: self.expression( 1057 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1058 ), 1059 "STORED": lambda self: self._parse_stored(), 1060 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1061 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1062 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1063 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1064 "TO": lambda self: self._parse_to_table(), 1065 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1066 "TRANSFORM": lambda self: self.expression( 1067 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1068 ), 1069 "TTL": lambda self: self._parse_ttl(), 1070 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1071 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1072 "VOLATILE": lambda self: self._parse_volatile_property(), 1073 "WITH": lambda self: self._parse_with_property(), 1074 } 1075 1076 CONSTRAINT_PARSERS = { 1077 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1078 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1079 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1080 "CHARACTER SET": lambda self: self.expression( 1081 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1082 ), 1083 "CHECK": lambda self: self.expression( 1084 exp.CheckColumnConstraint, 1085 this=self._parse_wrapped(self._parse_assignment), 1086 enforced=self._match_text_seq("ENFORCED"), 1087 ), 1088 "COLLATE": lambda self: self.expression( 1089 exp.CollateColumnConstraint, 1090 this=self._parse_identifier() or self._parse_column(), 1091 ), 1092 "COMMENT": lambda self: self.expression( 1093 exp.CommentColumnConstraint, this=self._parse_string() 1094 ), 1095 "COMPRESS": lambda self: self._parse_compress(), 1096 "CLUSTERED": lambda self: self.expression( 1097 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1098 ), 1099 "NONCLUSTERED": lambda self: self.expression( 1100 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1101 ), 1102 "DEFAULT": lambda self: self.expression( 1103 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1104 ), 1105 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1106 "EPHEMERAL": lambda self: self.expression( 1107 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1108 ), 1109 "EXCLUDE": lambda self: self.expression( 1110 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1111 ), 1112 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1113 "FORMAT": lambda self: self.expression( 1114 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1115 ), 1116 "GENERATED": lambda self: self._parse_generated_as_identity(), 1117 "IDENTITY": lambda self: self._parse_auto_increment(), 1118 "INLINE": lambda self: self._parse_inline(), 1119 "LIKE": lambda self: self._parse_create_like(), 1120 "NOT": lambda self: self._parse_not_constraint(), 1121 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1122 "ON": lambda self: ( 1123 self._match(TokenType.UPDATE) 1124 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1125 ) 1126 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1127 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1128 "PERIOD": lambda self: self._parse_period_for_system_time(), 1129 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1130 "REFERENCES": lambda self: self._parse_references(match=False), 1131 "TITLE": lambda self: self.expression( 1132 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1133 ), 1134 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1135 "UNIQUE": lambda self: self._parse_unique(), 1136 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1137 "WATERMARK": lambda self: self.expression( 1138 exp.WatermarkColumnConstraint, 1139 this=self._match(TokenType.FOR) and self._parse_column(), 1140 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1141 ), 1142 "WITH": lambda self: self.expression( 1143 exp.Properties, expressions=self._parse_wrapped_properties() 1144 ), 1145 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1146 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1147 } 1148 1149 def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expression]: 1150 if not self._match(TokenType.L_PAREN, advance=False): 1151 # Partitioning by bucket or truncate follows the syntax: 1152 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1153 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1154 self._retreat(self._index - 1) 1155 return None 1156 1157 klass = ( 1158 exp.PartitionedByBucket 1159 if self._prev.text.upper() == "BUCKET" 1160 else exp.PartitionByTruncate 1161 ) 1162 1163 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1164 this, expression = seq_get(args, 0), seq_get(args, 1) 1165 1166 if isinstance(this, exp.Literal): 1167 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1168 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1169 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1170 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1171 # 1172 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1173 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1174 this, expression = expression, this 1175 1176 return self.expression(klass, this=this, expression=expression) 1177 1178 ALTER_PARSERS = { 1179 "ADD": lambda self: self._parse_alter_table_add(), 1180 "AS": lambda self: self._parse_select(), 1181 "ALTER": lambda self: self._parse_alter_table_alter(), 1182 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1183 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1184 "DROP": lambda self: self._parse_alter_table_drop(), 1185 "RENAME": lambda self: self._parse_alter_table_rename(), 1186 "SET": lambda self: self._parse_alter_table_set(), 1187 "SWAP": lambda self: self.expression( 1188 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1189 ), 1190 } 1191 1192 ALTER_ALTER_PARSERS = { 1193 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1194 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1195 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1196 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1197 } 1198 1199 SCHEMA_UNNAMED_CONSTRAINTS = { 1200 "CHECK", 1201 "EXCLUDE", 1202 "FOREIGN KEY", 1203 "LIKE", 1204 "PERIOD", 1205 "PRIMARY KEY", 1206 "UNIQUE", 1207 "WATERMARK", 1208 "BUCKET", 1209 "TRUNCATE", 1210 } 1211 1212 NO_PAREN_FUNCTION_PARSERS = { 1213 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1214 "CASE": lambda self: self._parse_case(), 1215 "CONNECT_BY_ROOT": lambda self: self.expression( 1216 exp.ConnectByRoot, this=self._parse_column() 1217 ), 1218 "IF": lambda self: self._parse_if(), 1219 } 1220 1221 INVALID_FUNC_NAME_TOKENS = { 1222 TokenType.IDENTIFIER, 1223 TokenType.STRING, 1224 } 1225 1226 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1227 1228 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1229 1230 FUNCTION_PARSERS = { 1231 **{ 1232 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1233 }, 1234 **{ 1235 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1236 }, 1237 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1238 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1239 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1240 "DECODE": lambda self: self._parse_decode(), 1241 "EXTRACT": lambda self: self._parse_extract(), 1242 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1243 "GAP_FILL": lambda self: self._parse_gap_fill(), 1244 "JSON_OBJECT": lambda self: self._parse_json_object(), 1245 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1246 "JSON_TABLE": lambda self: self._parse_json_table(), 1247 "MATCH": lambda self: self._parse_match_against(), 1248 "NORMALIZE": lambda self: self._parse_normalize(), 1249 "OPENJSON": lambda self: self._parse_open_json(), 1250 "OVERLAY": lambda self: self._parse_overlay(), 1251 "POSITION": lambda self: self._parse_position(), 1252 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1253 "STRING_AGG": lambda self: self._parse_string_agg(), 1254 "SUBSTRING": lambda self: self._parse_substring(), 1255 "TRIM": lambda self: self._parse_trim(), 1256 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1257 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1258 "XMLELEMENT": lambda self: self.expression( 1259 exp.XMLElement, 1260 this=self._match_text_seq("NAME") and self._parse_id_var(), 1261 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1262 ), 1263 "XMLTABLE": lambda self: self._parse_xml_table(), 1264 } 1265 1266 QUERY_MODIFIER_PARSERS = { 1267 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1268 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1269 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1270 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1271 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1272 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1273 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1274 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1275 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1276 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1277 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1278 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1279 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1280 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1281 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1282 TokenType.CLUSTER_BY: lambda self: ( 1283 "cluster", 1284 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1285 ), 1286 TokenType.DISTRIBUTE_BY: lambda self: ( 1287 "distribute", 1288 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1289 ), 1290 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1291 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1292 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1293 } 1294 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1295 1296 SET_PARSERS = { 1297 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1298 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1299 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1300 "TRANSACTION": lambda self: self._parse_set_transaction(), 1301 } 1302 1303 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1304 1305 TYPE_LITERAL_PARSERS = { 1306 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1307 } 1308 1309 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1310 1311 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1312 1313 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1314 1315 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1316 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1317 "ISOLATION": ( 1318 ("LEVEL", "REPEATABLE", "READ"), 1319 ("LEVEL", "READ", "COMMITTED"), 1320 ("LEVEL", "READ", "UNCOMITTED"), 1321 ("LEVEL", "SERIALIZABLE"), 1322 ), 1323 "READ": ("WRITE", "ONLY"), 1324 } 1325 1326 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1327 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1328 ) 1329 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1330 1331 CREATE_SEQUENCE: OPTIONS_TYPE = { 1332 "SCALE": ("EXTEND", "NOEXTEND"), 1333 "SHARD": ("EXTEND", "NOEXTEND"), 1334 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1335 **dict.fromkeys( 1336 ( 1337 "SESSION", 1338 "GLOBAL", 1339 "KEEP", 1340 "NOKEEP", 1341 "ORDER", 1342 "NOORDER", 1343 "NOCACHE", 1344 "CYCLE", 1345 "NOCYCLE", 1346 "NOMINVALUE", 1347 "NOMAXVALUE", 1348 "NOSCALE", 1349 "NOSHARD", 1350 ), 1351 tuple(), 1352 ), 1353 } 1354 1355 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1356 1357 USABLES: OPTIONS_TYPE = dict.fromkeys( 1358 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1359 ) 1360 1361 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1362 1363 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1364 "TYPE": ("EVOLUTION",), 1365 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1366 } 1367 1368 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1369 1370 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1371 1372 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1373 "NOT": ("ENFORCED",), 1374 "MATCH": ( 1375 "FULL", 1376 "PARTIAL", 1377 "SIMPLE", 1378 ), 1379 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1380 "USING": ( 1381 "BTREE", 1382 "HASH", 1383 ), 1384 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1385 } 1386 1387 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1388 "NO": ("OTHERS",), 1389 "CURRENT": ("ROW",), 1390 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1391 } 1392 1393 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1394 1395 CLONE_KEYWORDS = {"CLONE", "COPY"} 1396 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1397 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1398 1399 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1400 1401 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1402 1403 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1404 1405 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1406 1407 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1408 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1409 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1410 1411 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1412 1413 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1414 1415 ADD_CONSTRAINT_TOKENS = { 1416 TokenType.CONSTRAINT, 1417 TokenType.FOREIGN_KEY, 1418 TokenType.INDEX, 1419 TokenType.KEY, 1420 TokenType.PRIMARY_KEY, 1421 TokenType.UNIQUE, 1422 } 1423 1424 DISTINCT_TOKENS = {TokenType.DISTINCT} 1425 1426 NULL_TOKENS = {TokenType.NULL} 1427 1428 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1429 1430 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1431 1432 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1433 1434 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1435 1436 ODBC_DATETIME_LITERALS = { 1437 "d": exp.Date, 1438 "t": exp.Time, 1439 "ts": exp.Timestamp, 1440 } 1441 1442 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1443 1444 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1445 1446 # The style options for the DESCRIBE statement 1447 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1448 1449 # The style options for the ANALYZE statement 1450 ANALYZE_STYLES = { 1451 "BUFFER_USAGE_LIMIT", 1452 "FULL", 1453 "LOCAL", 1454 "NO_WRITE_TO_BINLOG", 1455 "SAMPLE", 1456 "SKIP_LOCKED", 1457 "VERBOSE", 1458 } 1459 1460 ANALYZE_EXPRESSION_PARSERS = { 1461 "ALL": lambda self: self._parse_analyze_columns(), 1462 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1463 "DELETE": lambda self: self._parse_analyze_delete(), 1464 "DROP": lambda self: self._parse_analyze_histogram(), 1465 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1466 "LIST": lambda self: self._parse_analyze_list(), 1467 "PREDICATE": lambda self: self._parse_analyze_columns(), 1468 "UPDATE": lambda self: self._parse_analyze_histogram(), 1469 "VALIDATE": lambda self: self._parse_analyze_validate(), 1470 } 1471 1472 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1473 1474 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1475 1476 OPERATION_MODIFIERS: t.Set[str] = set() 1477 1478 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1479 1480 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1481 1482 STRICT_CAST = True 1483 1484 PREFIXED_PIVOT_COLUMNS = False 1485 IDENTIFY_PIVOT_STRINGS = False 1486 1487 LOG_DEFAULTS_TO_LN = False 1488 1489 # Whether the table sample clause expects CSV syntax 1490 TABLESAMPLE_CSV = False 1491 1492 # The default method used for table sampling 1493 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1494 1495 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1496 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1497 1498 # Whether the TRIM function expects the characters to trim as its first argument 1499 TRIM_PATTERN_FIRST = False 1500 1501 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1502 STRING_ALIASES = False 1503 1504 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1505 MODIFIERS_ATTACHED_TO_SET_OP = True 1506 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1507 1508 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1509 NO_PAREN_IF_COMMANDS = True 1510 1511 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1512 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1513 1514 # Whether the `:` operator is used to extract a value from a VARIANT column 1515 COLON_IS_VARIANT_EXTRACT = False 1516 1517 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1518 # If this is True and '(' is not found, the keyword will be treated as an identifier 1519 VALUES_FOLLOWED_BY_PAREN = True 1520 1521 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1522 SUPPORTS_IMPLICIT_UNNEST = False 1523 1524 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1525 INTERVAL_SPANS = True 1526 1527 # Whether a PARTITION clause can follow a table reference 1528 SUPPORTS_PARTITION_SELECTION = False 1529 1530 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1531 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1532 1533 # Whether the 'AS' keyword is optional in the CTE definition syntax 1534 OPTIONAL_ALIAS_TOKEN_CTE = True 1535 1536 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1537 ALTER_RENAME_REQUIRES_COLUMN = True 1538 1539 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1540 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1541 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1542 # as BigQuery, where all joins have the same precedence. 1543 JOINS_HAVE_EQUAL_PRECEDENCE = False 1544 1545 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1546 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1547 1548 # Whether map literals support arbitrary expressions as keys. 1549 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1550 # When False, keys are typically restricted to identifiers. 1551 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1552 1553 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1554 # is true for Snowflake but not for BigQuery which can also process strings 1555 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = False 1556 1557 __slots__ = ( 1558 "error_level", 1559 "error_message_context", 1560 "max_errors", 1561 "dialect", 1562 "sql", 1563 "errors", 1564 "_tokens", 1565 "_index", 1566 "_curr", 1567 "_next", 1568 "_prev", 1569 "_prev_comments", 1570 "_pipe_cte_counter", 1571 ) 1572 1573 # Autofilled 1574 SHOW_TRIE: t.Dict = {} 1575 SET_TRIE: t.Dict = {} 1576 1577 def __init__( 1578 self, 1579 error_level: t.Optional[ErrorLevel] = None, 1580 error_message_context: int = 100, 1581 max_errors: int = 3, 1582 dialect: DialectType = None, 1583 ): 1584 from sqlglot.dialects import Dialect 1585 1586 self.error_level = error_level or ErrorLevel.IMMEDIATE 1587 self.error_message_context = error_message_context 1588 self.max_errors = max_errors 1589 self.dialect = Dialect.get_or_raise(dialect) 1590 self.reset() 1591 1592 def reset(self): 1593 self.sql = "" 1594 self.errors = [] 1595 self._tokens = [] 1596 self._index = 0 1597 self._curr = None 1598 self._next = None 1599 self._prev = None 1600 self._prev_comments = None 1601 self._pipe_cte_counter = 0 1602 1603 def parse( 1604 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1605 ) -> t.List[t.Optional[exp.Expression]]: 1606 """ 1607 Parses a list of tokens and returns a list of syntax trees, one tree 1608 per parsed SQL statement. 1609 1610 Args: 1611 raw_tokens: The list of tokens. 1612 sql: The original SQL string, used to produce helpful debug messages. 1613 1614 Returns: 1615 The list of the produced syntax trees. 1616 """ 1617 return self._parse( 1618 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1619 ) 1620 1621 def parse_into( 1622 self, 1623 expression_types: exp.IntoType, 1624 raw_tokens: t.List[Token], 1625 sql: t.Optional[str] = None, 1626 ) -> t.List[t.Optional[exp.Expression]]: 1627 """ 1628 Parses a list of tokens into a given Expression type. If a collection of Expression 1629 types is given instead, this method will try to parse the token list into each one 1630 of them, stopping at the first for which the parsing succeeds. 1631 1632 Args: 1633 expression_types: The expression type(s) to try and parse the token list into. 1634 raw_tokens: The list of tokens. 1635 sql: The original SQL string, used to produce helpful debug messages. 1636 1637 Returns: 1638 The target Expression. 1639 """ 1640 errors = [] 1641 for expression_type in ensure_list(expression_types): 1642 parser = self.EXPRESSION_PARSERS.get(expression_type) 1643 if not parser: 1644 raise TypeError(f"No parser registered for {expression_type}") 1645 1646 try: 1647 return self._parse(parser, raw_tokens, sql) 1648 except ParseError as e: 1649 e.errors[0]["into_expression"] = expression_type 1650 errors.append(e) 1651 1652 raise ParseError( 1653 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1654 errors=merge_errors(errors), 1655 ) from errors[-1] 1656 1657 def _parse( 1658 self, 1659 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1660 raw_tokens: t.List[Token], 1661 sql: t.Optional[str] = None, 1662 ) -> t.List[t.Optional[exp.Expression]]: 1663 self.reset() 1664 self.sql = sql or "" 1665 1666 total = len(raw_tokens) 1667 chunks: t.List[t.List[Token]] = [[]] 1668 1669 for i, token in enumerate(raw_tokens): 1670 if token.token_type == TokenType.SEMICOLON: 1671 if token.comments: 1672 chunks.append([token]) 1673 1674 if i < total - 1: 1675 chunks.append([]) 1676 else: 1677 chunks[-1].append(token) 1678 1679 expressions = [] 1680 1681 for tokens in chunks: 1682 self._index = -1 1683 self._tokens = tokens 1684 self._advance() 1685 1686 expressions.append(parse_method(self)) 1687 1688 if self._index < len(self._tokens): 1689 self.raise_error("Invalid expression / Unexpected token") 1690 1691 self.check_errors() 1692 1693 return expressions 1694 1695 def check_errors(self) -> None: 1696 """Logs or raises any found errors, depending on the chosen error level setting.""" 1697 if self.error_level == ErrorLevel.WARN: 1698 for error in self.errors: 1699 logger.error(str(error)) 1700 elif self.error_level == ErrorLevel.RAISE and self.errors: 1701 raise ParseError( 1702 concat_messages(self.errors, self.max_errors), 1703 errors=merge_errors(self.errors), 1704 ) 1705 1706 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1707 """ 1708 Appends an error in the list of recorded errors or raises it, depending on the chosen 1709 error level setting. 1710 """ 1711 token = token or self._curr or self._prev or Token.string("") 1712 start = token.start 1713 end = token.end + 1 1714 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1715 highlight = self.sql[start:end] 1716 end_context = self.sql[end : end + self.error_message_context] 1717 1718 error = ParseError.new( 1719 f"{message}. Line {token.line}, Col: {token.col}.\n" 1720 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1721 description=message, 1722 line=token.line, 1723 col=token.col, 1724 start_context=start_context, 1725 highlight=highlight, 1726 end_context=end_context, 1727 ) 1728 1729 if self.error_level == ErrorLevel.IMMEDIATE: 1730 raise error 1731 1732 self.errors.append(error) 1733 1734 def expression( 1735 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1736 ) -> E: 1737 """ 1738 Creates a new, validated Expression. 1739 1740 Args: 1741 exp_class: The expression class to instantiate. 1742 comments: An optional list of comments to attach to the expression. 1743 kwargs: The arguments to set for the expression along with their respective values. 1744 1745 Returns: 1746 The target expression. 1747 """ 1748 instance = exp_class(**kwargs) 1749 instance.add_comments(comments) if comments else self._add_comments(instance) 1750 return self.validate_expression(instance) 1751 1752 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1753 if expression and self._prev_comments: 1754 expression.add_comments(self._prev_comments) 1755 self._prev_comments = None 1756 1757 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1758 """ 1759 Validates an Expression, making sure that all its mandatory arguments are set. 1760 1761 Args: 1762 expression: The expression to validate. 1763 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1764 1765 Returns: 1766 The validated expression. 1767 """ 1768 if self.error_level != ErrorLevel.IGNORE: 1769 for error_message in expression.error_messages(args): 1770 self.raise_error(error_message) 1771 1772 return expression 1773 1774 def _find_sql(self, start: Token, end: Token) -> str: 1775 return self.sql[start.start : end.end + 1] 1776 1777 def _is_connected(self) -> bool: 1778 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1779 1780 def _advance(self, times: int = 1) -> None: 1781 self._index += times 1782 self._curr = seq_get(self._tokens, self._index) 1783 self._next = seq_get(self._tokens, self._index + 1) 1784 1785 if self._index > 0: 1786 self._prev = self._tokens[self._index - 1] 1787 self._prev_comments = self._prev.comments 1788 else: 1789 self._prev = None 1790 self._prev_comments = None 1791 1792 def _retreat(self, index: int) -> None: 1793 if index != self._index: 1794 self._advance(index - self._index) 1795 1796 def _warn_unsupported(self) -> None: 1797 if len(self._tokens) <= 1: 1798 return 1799 1800 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1801 # interested in emitting a warning for the one being currently processed. 1802 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1803 1804 logger.warning( 1805 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1806 ) 1807 1808 def _parse_command(self) -> exp.Command: 1809 self._warn_unsupported() 1810 return self.expression( 1811 exp.Command, 1812 comments=self._prev_comments, 1813 this=self._prev.text.upper(), 1814 expression=self._parse_string(), 1815 ) 1816 1817 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1818 """ 1819 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1820 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1821 solve this by setting & resetting the parser state accordingly 1822 """ 1823 index = self._index 1824 error_level = self.error_level 1825 1826 self.error_level = ErrorLevel.IMMEDIATE 1827 try: 1828 this = parse_method() 1829 except ParseError: 1830 this = None 1831 finally: 1832 if not this or retreat: 1833 self._retreat(index) 1834 self.error_level = error_level 1835 1836 return this 1837 1838 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1839 start = self._prev 1840 exists = self._parse_exists() if allow_exists else None 1841 1842 self._match(TokenType.ON) 1843 1844 materialized = self._match_text_seq("MATERIALIZED") 1845 kind = self._match_set(self.CREATABLES) and self._prev 1846 if not kind: 1847 return self._parse_as_command(start) 1848 1849 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1850 this = self._parse_user_defined_function(kind=kind.token_type) 1851 elif kind.token_type == TokenType.TABLE: 1852 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1853 elif kind.token_type == TokenType.COLUMN: 1854 this = self._parse_column() 1855 else: 1856 this = self._parse_id_var() 1857 1858 self._match(TokenType.IS) 1859 1860 return self.expression( 1861 exp.Comment, 1862 this=this, 1863 kind=kind.text, 1864 expression=self._parse_string(), 1865 exists=exists, 1866 materialized=materialized, 1867 ) 1868 1869 def _parse_to_table( 1870 self, 1871 ) -> exp.ToTableProperty: 1872 table = self._parse_table_parts(schema=True) 1873 return self.expression(exp.ToTableProperty, this=table) 1874 1875 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1876 def _parse_ttl(self) -> exp.Expression: 1877 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1878 this = self._parse_bitwise() 1879 1880 if self._match_text_seq("DELETE"): 1881 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1882 if self._match_text_seq("RECOMPRESS"): 1883 return self.expression( 1884 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1885 ) 1886 if self._match_text_seq("TO", "DISK"): 1887 return self.expression( 1888 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1889 ) 1890 if self._match_text_seq("TO", "VOLUME"): 1891 return self.expression( 1892 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1893 ) 1894 1895 return this 1896 1897 expressions = self._parse_csv(_parse_ttl_action) 1898 where = self._parse_where() 1899 group = self._parse_group() 1900 1901 aggregates = None 1902 if group and self._match(TokenType.SET): 1903 aggregates = self._parse_csv(self._parse_set_item) 1904 1905 return self.expression( 1906 exp.MergeTreeTTL, 1907 expressions=expressions, 1908 where=where, 1909 group=group, 1910 aggregates=aggregates, 1911 ) 1912 1913 def _parse_statement(self) -> t.Optional[exp.Expression]: 1914 if self._curr is None: 1915 return None 1916 1917 if self._match_set(self.STATEMENT_PARSERS): 1918 comments = self._prev_comments 1919 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1920 stmt.add_comments(comments, prepend=True) 1921 return stmt 1922 1923 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1924 return self._parse_command() 1925 1926 expression = self._parse_expression() 1927 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1928 return self._parse_query_modifiers(expression) 1929 1930 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1931 start = self._prev 1932 temporary = self._match(TokenType.TEMPORARY) 1933 materialized = self._match_text_seq("MATERIALIZED") 1934 1935 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1936 if not kind: 1937 return self._parse_as_command(start) 1938 1939 concurrently = self._match_text_seq("CONCURRENTLY") 1940 if_exists = exists or self._parse_exists() 1941 1942 if kind == "COLUMN": 1943 this = self._parse_column() 1944 else: 1945 this = self._parse_table_parts( 1946 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1947 ) 1948 1949 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1950 1951 if self._match(TokenType.L_PAREN, advance=False): 1952 expressions = self._parse_wrapped_csv(self._parse_types) 1953 else: 1954 expressions = None 1955 1956 return self.expression( 1957 exp.Drop, 1958 exists=if_exists, 1959 this=this, 1960 expressions=expressions, 1961 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1962 temporary=temporary, 1963 materialized=materialized, 1964 cascade=self._match_text_seq("CASCADE"), 1965 constraints=self._match_text_seq("CONSTRAINTS"), 1966 purge=self._match_text_seq("PURGE"), 1967 cluster=cluster, 1968 concurrently=concurrently, 1969 ) 1970 1971 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1972 return ( 1973 self._match_text_seq("IF") 1974 and (not not_ or self._match(TokenType.NOT)) 1975 and self._match(TokenType.EXISTS) 1976 ) 1977 1978 def _parse_create(self) -> exp.Create | exp.Command: 1979 # Note: this can't be None because we've matched a statement parser 1980 start = self._prev 1981 1982 replace = ( 1983 start.token_type == TokenType.REPLACE 1984 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1985 or self._match_pair(TokenType.OR, TokenType.ALTER) 1986 ) 1987 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1988 1989 unique = self._match(TokenType.UNIQUE) 1990 1991 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1992 clustered = True 1993 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1994 "COLUMNSTORE" 1995 ): 1996 clustered = False 1997 else: 1998 clustered = None 1999 2000 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 2001 self._advance() 2002 2003 properties = None 2004 create_token = self._match_set(self.CREATABLES) and self._prev 2005 2006 if not create_token: 2007 # exp.Properties.Location.POST_CREATE 2008 properties = self._parse_properties() 2009 create_token = self._match_set(self.CREATABLES) and self._prev 2010 2011 if not properties or not create_token: 2012 return self._parse_as_command(start) 2013 2014 concurrently = self._match_text_seq("CONCURRENTLY") 2015 exists = self._parse_exists(not_=True) 2016 this = None 2017 expression: t.Optional[exp.Expression] = None 2018 indexes = None 2019 no_schema_binding = None 2020 begin = None 2021 end = None 2022 clone = None 2023 2024 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2025 nonlocal properties 2026 if properties and temp_props: 2027 properties.expressions.extend(temp_props.expressions) 2028 elif temp_props: 2029 properties = temp_props 2030 2031 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2032 this = self._parse_user_defined_function(kind=create_token.token_type) 2033 2034 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2035 extend_props(self._parse_properties()) 2036 2037 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2038 extend_props(self._parse_properties()) 2039 2040 if not expression: 2041 if self._match(TokenType.COMMAND): 2042 expression = self._parse_as_command(self._prev) 2043 else: 2044 begin = self._match(TokenType.BEGIN) 2045 return_ = self._match_text_seq("RETURN") 2046 2047 if self._match(TokenType.STRING, advance=False): 2048 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2049 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2050 expression = self._parse_string() 2051 extend_props(self._parse_properties()) 2052 else: 2053 expression = self._parse_user_defined_function_expression() 2054 2055 end = self._match_text_seq("END") 2056 2057 if return_: 2058 expression = self.expression(exp.Return, this=expression) 2059 elif create_token.token_type == TokenType.INDEX: 2060 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2061 if not self._match(TokenType.ON): 2062 index = self._parse_id_var() 2063 anonymous = False 2064 else: 2065 index = None 2066 anonymous = True 2067 2068 this = self._parse_index(index=index, anonymous=anonymous) 2069 elif create_token.token_type in self.DB_CREATABLES: 2070 table_parts = self._parse_table_parts( 2071 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2072 ) 2073 2074 # exp.Properties.Location.POST_NAME 2075 self._match(TokenType.COMMA) 2076 extend_props(self._parse_properties(before=True)) 2077 2078 this = self._parse_schema(this=table_parts) 2079 2080 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2081 extend_props(self._parse_properties()) 2082 2083 has_alias = self._match(TokenType.ALIAS) 2084 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2085 # exp.Properties.Location.POST_ALIAS 2086 extend_props(self._parse_properties()) 2087 2088 if create_token.token_type == TokenType.SEQUENCE: 2089 expression = self._parse_types() 2090 props = self._parse_properties() 2091 if props: 2092 sequence_props = exp.SequenceProperties() 2093 options = [] 2094 for prop in props: 2095 if isinstance(prop, exp.SequenceProperties): 2096 for arg, value in prop.args.items(): 2097 if arg == "options": 2098 options.extend(value) 2099 else: 2100 sequence_props.set(arg, value) 2101 prop.pop() 2102 2103 if options: 2104 sequence_props.set("options", options) 2105 2106 props.append("expressions", sequence_props) 2107 extend_props(props) 2108 else: 2109 expression = self._parse_ddl_select() 2110 2111 # Some dialects also support using a table as an alias instead of a SELECT. 2112 # Here we fallback to this as an alternative. 2113 if not expression and has_alias: 2114 expression = self._try_parse(self._parse_table_parts) 2115 2116 if create_token.token_type == TokenType.TABLE: 2117 # exp.Properties.Location.POST_EXPRESSION 2118 extend_props(self._parse_properties()) 2119 2120 indexes = [] 2121 while True: 2122 index = self._parse_index() 2123 2124 # exp.Properties.Location.POST_INDEX 2125 extend_props(self._parse_properties()) 2126 if not index: 2127 break 2128 else: 2129 self._match(TokenType.COMMA) 2130 indexes.append(index) 2131 elif create_token.token_type == TokenType.VIEW: 2132 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2133 no_schema_binding = True 2134 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2135 extend_props(self._parse_properties()) 2136 2137 shallow = self._match_text_seq("SHALLOW") 2138 2139 if self._match_texts(self.CLONE_KEYWORDS): 2140 copy = self._prev.text.lower() == "copy" 2141 clone = self.expression( 2142 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2143 ) 2144 2145 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2146 return self._parse_as_command(start) 2147 2148 create_kind_text = create_token.text.upper() 2149 return self.expression( 2150 exp.Create, 2151 this=this, 2152 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2153 replace=replace, 2154 refresh=refresh, 2155 unique=unique, 2156 expression=expression, 2157 exists=exists, 2158 properties=properties, 2159 indexes=indexes, 2160 no_schema_binding=no_schema_binding, 2161 begin=begin, 2162 end=end, 2163 clone=clone, 2164 concurrently=concurrently, 2165 clustered=clustered, 2166 ) 2167 2168 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2169 seq = exp.SequenceProperties() 2170 2171 options = [] 2172 index = self._index 2173 2174 while self._curr: 2175 self._match(TokenType.COMMA) 2176 if self._match_text_seq("INCREMENT"): 2177 self._match_text_seq("BY") 2178 self._match_text_seq("=") 2179 seq.set("increment", self._parse_term()) 2180 elif self._match_text_seq("MINVALUE"): 2181 seq.set("minvalue", self._parse_term()) 2182 elif self._match_text_seq("MAXVALUE"): 2183 seq.set("maxvalue", self._parse_term()) 2184 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2185 self._match_text_seq("=") 2186 seq.set("start", self._parse_term()) 2187 elif self._match_text_seq("CACHE"): 2188 # T-SQL allows empty CACHE which is initialized dynamically 2189 seq.set("cache", self._parse_number() or True) 2190 elif self._match_text_seq("OWNED", "BY"): 2191 # "OWNED BY NONE" is the default 2192 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2193 else: 2194 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2195 if opt: 2196 options.append(opt) 2197 else: 2198 break 2199 2200 seq.set("options", options if options else None) 2201 return None if self._index == index else seq 2202 2203 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2204 # only used for teradata currently 2205 self._match(TokenType.COMMA) 2206 2207 kwargs = { 2208 "no": self._match_text_seq("NO"), 2209 "dual": self._match_text_seq("DUAL"), 2210 "before": self._match_text_seq("BEFORE"), 2211 "default": self._match_text_seq("DEFAULT"), 2212 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2213 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2214 "after": self._match_text_seq("AFTER"), 2215 "minimum": self._match_texts(("MIN", "MINIMUM")), 2216 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2217 } 2218 2219 if self._match_texts(self.PROPERTY_PARSERS): 2220 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2221 try: 2222 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2223 except TypeError: 2224 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2225 2226 return None 2227 2228 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2229 return self._parse_wrapped_csv(self._parse_property) 2230 2231 def _parse_property(self) -> t.Optional[exp.Expression]: 2232 if self._match_texts(self.PROPERTY_PARSERS): 2233 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2234 2235 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2236 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2237 2238 if self._match_text_seq("COMPOUND", "SORTKEY"): 2239 return self._parse_sortkey(compound=True) 2240 2241 if self._match_text_seq("SQL", "SECURITY"): 2242 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2243 2244 index = self._index 2245 2246 seq_props = self._parse_sequence_properties() 2247 if seq_props: 2248 return seq_props 2249 2250 self._retreat(index) 2251 key = self._parse_column() 2252 2253 if not self._match(TokenType.EQ): 2254 self._retreat(index) 2255 return None 2256 2257 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2258 if isinstance(key, exp.Column): 2259 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2260 2261 value = self._parse_bitwise() or self._parse_var(any_token=True) 2262 2263 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2264 if isinstance(value, exp.Column): 2265 value = exp.var(value.name) 2266 2267 return self.expression(exp.Property, this=key, value=value) 2268 2269 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2270 if self._match_text_seq("BY"): 2271 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2272 2273 self._match(TokenType.ALIAS) 2274 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2275 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2276 2277 return self.expression( 2278 exp.FileFormatProperty, 2279 this=( 2280 self.expression( 2281 exp.InputOutputFormat, 2282 input_format=input_format, 2283 output_format=output_format, 2284 ) 2285 if input_format or output_format 2286 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2287 ), 2288 hive_format=True, 2289 ) 2290 2291 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2292 field = self._parse_field() 2293 if isinstance(field, exp.Identifier) and not field.quoted: 2294 field = exp.var(field) 2295 2296 return field 2297 2298 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2299 self._match(TokenType.EQ) 2300 self._match(TokenType.ALIAS) 2301 2302 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2303 2304 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2305 properties = [] 2306 while True: 2307 if before: 2308 prop = self._parse_property_before() 2309 else: 2310 prop = self._parse_property() 2311 if not prop: 2312 break 2313 for p in ensure_list(prop): 2314 properties.append(p) 2315 2316 if properties: 2317 return self.expression(exp.Properties, expressions=properties) 2318 2319 return None 2320 2321 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2322 return self.expression( 2323 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2324 ) 2325 2326 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2327 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2328 security_specifier = self._prev.text.upper() 2329 return self.expression(exp.SecurityProperty, this=security_specifier) 2330 return None 2331 2332 def _parse_settings_property(self) -> exp.SettingsProperty: 2333 return self.expression( 2334 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2335 ) 2336 2337 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2338 if self._index >= 2: 2339 pre_volatile_token = self._tokens[self._index - 2] 2340 else: 2341 pre_volatile_token = None 2342 2343 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2344 return exp.VolatileProperty() 2345 2346 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2347 2348 def _parse_retention_period(self) -> exp.Var: 2349 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2350 number = self._parse_number() 2351 number_str = f"{number} " if number else "" 2352 unit = self._parse_var(any_token=True) 2353 return exp.var(f"{number_str}{unit}") 2354 2355 def _parse_system_versioning_property( 2356 self, with_: bool = False 2357 ) -> exp.WithSystemVersioningProperty: 2358 self._match(TokenType.EQ) 2359 prop = self.expression( 2360 exp.WithSystemVersioningProperty, 2361 **{ # type: ignore 2362 "on": True, 2363 "with": with_, 2364 }, 2365 ) 2366 2367 if self._match_text_seq("OFF"): 2368 prop.set("on", False) 2369 return prop 2370 2371 self._match(TokenType.ON) 2372 if self._match(TokenType.L_PAREN): 2373 while self._curr and not self._match(TokenType.R_PAREN): 2374 if self._match_text_seq("HISTORY_TABLE", "="): 2375 prop.set("this", self._parse_table_parts()) 2376 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2377 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2378 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2379 prop.set("retention_period", self._parse_retention_period()) 2380 2381 self._match(TokenType.COMMA) 2382 2383 return prop 2384 2385 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2386 self._match(TokenType.EQ) 2387 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2388 prop = self.expression(exp.DataDeletionProperty, on=on) 2389 2390 if self._match(TokenType.L_PAREN): 2391 while self._curr and not self._match(TokenType.R_PAREN): 2392 if self._match_text_seq("FILTER_COLUMN", "="): 2393 prop.set("filter_column", self._parse_column()) 2394 elif self._match_text_seq("RETENTION_PERIOD", "="): 2395 prop.set("retention_period", self._parse_retention_period()) 2396 2397 self._match(TokenType.COMMA) 2398 2399 return prop 2400 2401 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2402 kind = "HASH" 2403 expressions: t.Optional[t.List[exp.Expression]] = None 2404 if self._match_text_seq("BY", "HASH"): 2405 expressions = self._parse_wrapped_csv(self._parse_id_var) 2406 elif self._match_text_seq("BY", "RANDOM"): 2407 kind = "RANDOM" 2408 2409 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2410 buckets: t.Optional[exp.Expression] = None 2411 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2412 buckets = self._parse_number() 2413 2414 return self.expression( 2415 exp.DistributedByProperty, 2416 expressions=expressions, 2417 kind=kind, 2418 buckets=buckets, 2419 order=self._parse_order(), 2420 ) 2421 2422 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2423 self._match_text_seq("KEY") 2424 expressions = self._parse_wrapped_id_vars() 2425 return self.expression(expr_type, expressions=expressions) 2426 2427 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2428 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2429 prop = self._parse_system_versioning_property(with_=True) 2430 self._match_r_paren() 2431 return prop 2432 2433 if self._match(TokenType.L_PAREN, advance=False): 2434 return self._parse_wrapped_properties() 2435 2436 if self._match_text_seq("JOURNAL"): 2437 return self._parse_withjournaltable() 2438 2439 if self._match_texts(self.VIEW_ATTRIBUTES): 2440 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2441 2442 if self._match_text_seq("DATA"): 2443 return self._parse_withdata(no=False) 2444 elif self._match_text_seq("NO", "DATA"): 2445 return self._parse_withdata(no=True) 2446 2447 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2448 return self._parse_serde_properties(with_=True) 2449 2450 if self._match(TokenType.SCHEMA): 2451 return self.expression( 2452 exp.WithSchemaBindingProperty, 2453 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2454 ) 2455 2456 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2457 return self.expression( 2458 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2459 ) 2460 2461 if not self._next: 2462 return None 2463 2464 return self._parse_withisolatedloading() 2465 2466 def _parse_procedure_option(self) -> exp.Expression | None: 2467 if self._match_text_seq("EXECUTE", "AS"): 2468 return self.expression( 2469 exp.ExecuteAsProperty, 2470 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2471 or self._parse_string(), 2472 ) 2473 2474 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2475 2476 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2477 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2478 self._match(TokenType.EQ) 2479 2480 user = self._parse_id_var() 2481 self._match(TokenType.PARAMETER) 2482 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2483 2484 if not user or not host: 2485 return None 2486 2487 return exp.DefinerProperty(this=f"{user}@{host}") 2488 2489 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2490 self._match(TokenType.TABLE) 2491 self._match(TokenType.EQ) 2492 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2493 2494 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2495 return self.expression(exp.LogProperty, no=no) 2496 2497 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2498 return self.expression(exp.JournalProperty, **kwargs) 2499 2500 def _parse_checksum(self) -> exp.ChecksumProperty: 2501 self._match(TokenType.EQ) 2502 2503 on = None 2504 if self._match(TokenType.ON): 2505 on = True 2506 elif self._match_text_seq("OFF"): 2507 on = False 2508 2509 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2510 2511 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2512 return self.expression( 2513 exp.Cluster, 2514 expressions=( 2515 self._parse_wrapped_csv(self._parse_ordered) 2516 if wrapped 2517 else self._parse_csv(self._parse_ordered) 2518 ), 2519 ) 2520 2521 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2522 self._match_text_seq("BY") 2523 2524 self._match_l_paren() 2525 expressions = self._parse_csv(self._parse_column) 2526 self._match_r_paren() 2527 2528 if self._match_text_seq("SORTED", "BY"): 2529 self._match_l_paren() 2530 sorted_by = self._parse_csv(self._parse_ordered) 2531 self._match_r_paren() 2532 else: 2533 sorted_by = None 2534 2535 self._match(TokenType.INTO) 2536 buckets = self._parse_number() 2537 self._match_text_seq("BUCKETS") 2538 2539 return self.expression( 2540 exp.ClusteredByProperty, 2541 expressions=expressions, 2542 sorted_by=sorted_by, 2543 buckets=buckets, 2544 ) 2545 2546 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2547 if not self._match_text_seq("GRANTS"): 2548 self._retreat(self._index - 1) 2549 return None 2550 2551 return self.expression(exp.CopyGrantsProperty) 2552 2553 def _parse_freespace(self) -> exp.FreespaceProperty: 2554 self._match(TokenType.EQ) 2555 return self.expression( 2556 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2557 ) 2558 2559 def _parse_mergeblockratio( 2560 self, no: bool = False, default: bool = False 2561 ) -> exp.MergeBlockRatioProperty: 2562 if self._match(TokenType.EQ): 2563 return self.expression( 2564 exp.MergeBlockRatioProperty, 2565 this=self._parse_number(), 2566 percent=self._match(TokenType.PERCENT), 2567 ) 2568 2569 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2570 2571 def _parse_datablocksize( 2572 self, 2573 default: t.Optional[bool] = None, 2574 minimum: t.Optional[bool] = None, 2575 maximum: t.Optional[bool] = None, 2576 ) -> exp.DataBlocksizeProperty: 2577 self._match(TokenType.EQ) 2578 size = self._parse_number() 2579 2580 units = None 2581 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2582 units = self._prev.text 2583 2584 return self.expression( 2585 exp.DataBlocksizeProperty, 2586 size=size, 2587 units=units, 2588 default=default, 2589 minimum=minimum, 2590 maximum=maximum, 2591 ) 2592 2593 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2594 self._match(TokenType.EQ) 2595 always = self._match_text_seq("ALWAYS") 2596 manual = self._match_text_seq("MANUAL") 2597 never = self._match_text_seq("NEVER") 2598 default = self._match_text_seq("DEFAULT") 2599 2600 autotemp = None 2601 if self._match_text_seq("AUTOTEMP"): 2602 autotemp = self._parse_schema() 2603 2604 return self.expression( 2605 exp.BlockCompressionProperty, 2606 always=always, 2607 manual=manual, 2608 never=never, 2609 default=default, 2610 autotemp=autotemp, 2611 ) 2612 2613 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2614 index = self._index 2615 no = self._match_text_seq("NO") 2616 concurrent = self._match_text_seq("CONCURRENT") 2617 2618 if not self._match_text_seq("ISOLATED", "LOADING"): 2619 self._retreat(index) 2620 return None 2621 2622 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2623 return self.expression( 2624 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2625 ) 2626 2627 def _parse_locking(self) -> exp.LockingProperty: 2628 if self._match(TokenType.TABLE): 2629 kind = "TABLE" 2630 elif self._match(TokenType.VIEW): 2631 kind = "VIEW" 2632 elif self._match(TokenType.ROW): 2633 kind = "ROW" 2634 elif self._match_text_seq("DATABASE"): 2635 kind = "DATABASE" 2636 else: 2637 kind = None 2638 2639 if kind in ("DATABASE", "TABLE", "VIEW"): 2640 this = self._parse_table_parts() 2641 else: 2642 this = None 2643 2644 if self._match(TokenType.FOR): 2645 for_or_in = "FOR" 2646 elif self._match(TokenType.IN): 2647 for_or_in = "IN" 2648 else: 2649 for_or_in = None 2650 2651 if self._match_text_seq("ACCESS"): 2652 lock_type = "ACCESS" 2653 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2654 lock_type = "EXCLUSIVE" 2655 elif self._match_text_seq("SHARE"): 2656 lock_type = "SHARE" 2657 elif self._match_text_seq("READ"): 2658 lock_type = "READ" 2659 elif self._match_text_seq("WRITE"): 2660 lock_type = "WRITE" 2661 elif self._match_text_seq("CHECKSUM"): 2662 lock_type = "CHECKSUM" 2663 else: 2664 lock_type = None 2665 2666 override = self._match_text_seq("OVERRIDE") 2667 2668 return self.expression( 2669 exp.LockingProperty, 2670 this=this, 2671 kind=kind, 2672 for_or_in=for_or_in, 2673 lock_type=lock_type, 2674 override=override, 2675 ) 2676 2677 def _parse_partition_by(self) -> t.List[exp.Expression]: 2678 if self._match(TokenType.PARTITION_BY): 2679 return self._parse_csv(self._parse_assignment) 2680 return [] 2681 2682 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2683 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2684 if self._match_text_seq("MINVALUE"): 2685 return exp.var("MINVALUE") 2686 if self._match_text_seq("MAXVALUE"): 2687 return exp.var("MAXVALUE") 2688 return self._parse_bitwise() 2689 2690 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2691 expression = None 2692 from_expressions = None 2693 to_expressions = None 2694 2695 if self._match(TokenType.IN): 2696 this = self._parse_wrapped_csv(self._parse_bitwise) 2697 elif self._match(TokenType.FROM): 2698 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2699 self._match_text_seq("TO") 2700 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2701 elif self._match_text_seq("WITH", "(", "MODULUS"): 2702 this = self._parse_number() 2703 self._match_text_seq(",", "REMAINDER") 2704 expression = self._parse_number() 2705 self._match_r_paren() 2706 else: 2707 self.raise_error("Failed to parse partition bound spec.") 2708 2709 return self.expression( 2710 exp.PartitionBoundSpec, 2711 this=this, 2712 expression=expression, 2713 from_expressions=from_expressions, 2714 to_expressions=to_expressions, 2715 ) 2716 2717 # https://www.postgresql.org/docs/current/sql-createtable.html 2718 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2719 if not self._match_text_seq("OF"): 2720 self._retreat(self._index - 1) 2721 return None 2722 2723 this = self._parse_table(schema=True) 2724 2725 if self._match(TokenType.DEFAULT): 2726 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2727 elif self._match_text_seq("FOR", "VALUES"): 2728 expression = self._parse_partition_bound_spec() 2729 else: 2730 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2731 2732 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2733 2734 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2735 self._match(TokenType.EQ) 2736 return self.expression( 2737 exp.PartitionedByProperty, 2738 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2739 ) 2740 2741 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2742 if self._match_text_seq("AND", "STATISTICS"): 2743 statistics = True 2744 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2745 statistics = False 2746 else: 2747 statistics = None 2748 2749 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2750 2751 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2752 if self._match_text_seq("SQL"): 2753 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2754 return None 2755 2756 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2757 if self._match_text_seq("SQL", "DATA"): 2758 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2759 return None 2760 2761 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2762 if self._match_text_seq("PRIMARY", "INDEX"): 2763 return exp.NoPrimaryIndexProperty() 2764 if self._match_text_seq("SQL"): 2765 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2766 return None 2767 2768 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2769 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2770 return exp.OnCommitProperty() 2771 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2772 return exp.OnCommitProperty(delete=True) 2773 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2774 2775 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2776 if self._match_text_seq("SQL", "DATA"): 2777 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2778 return None 2779 2780 def _parse_distkey(self) -> exp.DistKeyProperty: 2781 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2782 2783 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2784 table = self._parse_table(schema=True) 2785 2786 options = [] 2787 while self._match_texts(("INCLUDING", "EXCLUDING")): 2788 this = self._prev.text.upper() 2789 2790 id_var = self._parse_id_var() 2791 if not id_var: 2792 return None 2793 2794 options.append( 2795 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2796 ) 2797 2798 return self.expression(exp.LikeProperty, this=table, expressions=options) 2799 2800 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2801 return self.expression( 2802 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2803 ) 2804 2805 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2806 self._match(TokenType.EQ) 2807 return self.expression( 2808 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2809 ) 2810 2811 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2812 self._match_text_seq("WITH", "CONNECTION") 2813 return self.expression( 2814 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2815 ) 2816 2817 def _parse_returns(self) -> exp.ReturnsProperty: 2818 value: t.Optional[exp.Expression] 2819 null = None 2820 is_table = self._match(TokenType.TABLE) 2821 2822 if is_table: 2823 if self._match(TokenType.LT): 2824 value = self.expression( 2825 exp.Schema, 2826 this="TABLE", 2827 expressions=self._parse_csv(self._parse_struct_types), 2828 ) 2829 if not self._match(TokenType.GT): 2830 self.raise_error("Expecting >") 2831 else: 2832 value = self._parse_schema(exp.var("TABLE")) 2833 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2834 null = True 2835 value = None 2836 else: 2837 value = self._parse_types() 2838 2839 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2840 2841 def _parse_describe(self) -> exp.Describe: 2842 kind = self._match_set(self.CREATABLES) and self._prev.text 2843 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2844 if self._match(TokenType.DOT): 2845 style = None 2846 self._retreat(self._index - 2) 2847 2848 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2849 2850 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2851 this = self._parse_statement() 2852 else: 2853 this = self._parse_table(schema=True) 2854 2855 properties = self._parse_properties() 2856 expressions = properties.expressions if properties else None 2857 partition = self._parse_partition() 2858 return self.expression( 2859 exp.Describe, 2860 this=this, 2861 style=style, 2862 kind=kind, 2863 expressions=expressions, 2864 partition=partition, 2865 format=format, 2866 ) 2867 2868 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2869 kind = self._prev.text.upper() 2870 expressions = [] 2871 2872 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2873 if self._match(TokenType.WHEN): 2874 expression = self._parse_disjunction() 2875 self._match(TokenType.THEN) 2876 else: 2877 expression = None 2878 2879 else_ = self._match(TokenType.ELSE) 2880 2881 if not self._match(TokenType.INTO): 2882 return None 2883 2884 return self.expression( 2885 exp.ConditionalInsert, 2886 this=self.expression( 2887 exp.Insert, 2888 this=self._parse_table(schema=True), 2889 expression=self._parse_derived_table_values(), 2890 ), 2891 expression=expression, 2892 else_=else_, 2893 ) 2894 2895 expression = parse_conditional_insert() 2896 while expression is not None: 2897 expressions.append(expression) 2898 expression = parse_conditional_insert() 2899 2900 return self.expression( 2901 exp.MultitableInserts, 2902 kind=kind, 2903 comments=comments, 2904 expressions=expressions, 2905 source=self._parse_table(), 2906 ) 2907 2908 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2909 comments = [] 2910 hint = self._parse_hint() 2911 overwrite = self._match(TokenType.OVERWRITE) 2912 ignore = self._match(TokenType.IGNORE) 2913 local = self._match_text_seq("LOCAL") 2914 alternative = None 2915 is_function = None 2916 2917 if self._match_text_seq("DIRECTORY"): 2918 this: t.Optional[exp.Expression] = self.expression( 2919 exp.Directory, 2920 this=self._parse_var_or_string(), 2921 local=local, 2922 row_format=self._parse_row_format(match_row=True), 2923 ) 2924 else: 2925 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2926 comments += ensure_list(self._prev_comments) 2927 return self._parse_multitable_inserts(comments) 2928 2929 if self._match(TokenType.OR): 2930 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2931 2932 self._match(TokenType.INTO) 2933 comments += ensure_list(self._prev_comments) 2934 self._match(TokenType.TABLE) 2935 is_function = self._match(TokenType.FUNCTION) 2936 2937 this = ( 2938 self._parse_table(schema=True, parse_partition=True) 2939 if not is_function 2940 else self._parse_function() 2941 ) 2942 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2943 this.set("alias", self._parse_table_alias()) 2944 2945 returning = self._parse_returning() 2946 2947 return self.expression( 2948 exp.Insert, 2949 comments=comments, 2950 hint=hint, 2951 is_function=is_function, 2952 this=this, 2953 stored=self._match_text_seq("STORED") and self._parse_stored(), 2954 by_name=self._match_text_seq("BY", "NAME"), 2955 exists=self._parse_exists(), 2956 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2957 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2958 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2959 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2960 conflict=self._parse_on_conflict(), 2961 returning=returning or self._parse_returning(), 2962 overwrite=overwrite, 2963 alternative=alternative, 2964 ignore=ignore, 2965 source=self._match(TokenType.TABLE) and self._parse_table(), 2966 ) 2967 2968 def _parse_kill(self) -> exp.Kill: 2969 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2970 2971 return self.expression( 2972 exp.Kill, 2973 this=self._parse_primary(), 2974 kind=kind, 2975 ) 2976 2977 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2978 conflict = self._match_text_seq("ON", "CONFLICT") 2979 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2980 2981 if not conflict and not duplicate: 2982 return None 2983 2984 conflict_keys = None 2985 constraint = None 2986 2987 if conflict: 2988 if self._match_text_seq("ON", "CONSTRAINT"): 2989 constraint = self._parse_id_var() 2990 elif self._match(TokenType.L_PAREN): 2991 conflict_keys = self._parse_csv(self._parse_id_var) 2992 self._match_r_paren() 2993 2994 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2995 if self._prev.token_type == TokenType.UPDATE: 2996 self._match(TokenType.SET) 2997 expressions = self._parse_csv(self._parse_equality) 2998 else: 2999 expressions = None 3000 3001 return self.expression( 3002 exp.OnConflict, 3003 duplicate=duplicate, 3004 expressions=expressions, 3005 action=action, 3006 conflict_keys=conflict_keys, 3007 constraint=constraint, 3008 where=self._parse_where(), 3009 ) 3010 3011 def _parse_returning(self) -> t.Optional[exp.Returning]: 3012 if not self._match(TokenType.RETURNING): 3013 return None 3014 return self.expression( 3015 exp.Returning, 3016 expressions=self._parse_csv(self._parse_expression), 3017 into=self._match(TokenType.INTO) and self._parse_table_part(), 3018 ) 3019 3020 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3021 if not self._match(TokenType.FORMAT): 3022 return None 3023 return self._parse_row_format() 3024 3025 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 3026 index = self._index 3027 with_ = with_ or self._match_text_seq("WITH") 3028 3029 if not self._match(TokenType.SERDE_PROPERTIES): 3030 self._retreat(index) 3031 return None 3032 return self.expression( 3033 exp.SerdeProperties, 3034 **{ # type: ignore 3035 "expressions": self._parse_wrapped_properties(), 3036 "with": with_, 3037 }, 3038 ) 3039 3040 def _parse_row_format( 3041 self, match_row: bool = False 3042 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3043 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3044 return None 3045 3046 if self._match_text_seq("SERDE"): 3047 this = self._parse_string() 3048 3049 serde_properties = self._parse_serde_properties() 3050 3051 return self.expression( 3052 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3053 ) 3054 3055 self._match_text_seq("DELIMITED") 3056 3057 kwargs = {} 3058 3059 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3060 kwargs["fields"] = self._parse_string() 3061 if self._match_text_seq("ESCAPED", "BY"): 3062 kwargs["escaped"] = self._parse_string() 3063 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3064 kwargs["collection_items"] = self._parse_string() 3065 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3066 kwargs["map_keys"] = self._parse_string() 3067 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3068 kwargs["lines"] = self._parse_string() 3069 if self._match_text_seq("NULL", "DEFINED", "AS"): 3070 kwargs["null"] = self._parse_string() 3071 3072 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3073 3074 def _parse_load(self) -> exp.LoadData | exp.Command: 3075 if self._match_text_seq("DATA"): 3076 local = self._match_text_seq("LOCAL") 3077 self._match_text_seq("INPATH") 3078 inpath = self._parse_string() 3079 overwrite = self._match(TokenType.OVERWRITE) 3080 self._match_pair(TokenType.INTO, TokenType.TABLE) 3081 3082 return self.expression( 3083 exp.LoadData, 3084 this=self._parse_table(schema=True), 3085 local=local, 3086 overwrite=overwrite, 3087 inpath=inpath, 3088 partition=self._parse_partition(), 3089 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3090 serde=self._match_text_seq("SERDE") and self._parse_string(), 3091 ) 3092 return self._parse_as_command(self._prev) 3093 3094 def _parse_delete(self) -> exp.Delete: 3095 # This handles MySQL's "Multiple-Table Syntax" 3096 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3097 tables = None 3098 if not self._match(TokenType.FROM, advance=False): 3099 tables = self._parse_csv(self._parse_table) or None 3100 3101 returning = self._parse_returning() 3102 3103 return self.expression( 3104 exp.Delete, 3105 tables=tables, 3106 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3107 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3108 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3109 where=self._parse_where(), 3110 returning=returning or self._parse_returning(), 3111 limit=self._parse_limit(), 3112 ) 3113 3114 def _parse_update(self) -> exp.Update: 3115 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3116 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3117 returning = self._parse_returning() 3118 return self.expression( 3119 exp.Update, 3120 **{ # type: ignore 3121 "this": this, 3122 "expressions": expressions, 3123 "from": self._parse_from(joins=True), 3124 "where": self._parse_where(), 3125 "returning": returning or self._parse_returning(), 3126 "order": self._parse_order(), 3127 "limit": self._parse_limit(), 3128 }, 3129 ) 3130 3131 def _parse_use(self) -> exp.Use: 3132 return self.expression( 3133 exp.Use, 3134 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3135 this=self._parse_table(schema=False), 3136 ) 3137 3138 def _parse_uncache(self) -> exp.Uncache: 3139 if not self._match(TokenType.TABLE): 3140 self.raise_error("Expecting TABLE after UNCACHE") 3141 3142 return self.expression( 3143 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3144 ) 3145 3146 def _parse_cache(self) -> exp.Cache: 3147 lazy = self._match_text_seq("LAZY") 3148 self._match(TokenType.TABLE) 3149 table = self._parse_table(schema=True) 3150 3151 options = [] 3152 if self._match_text_seq("OPTIONS"): 3153 self._match_l_paren() 3154 k = self._parse_string() 3155 self._match(TokenType.EQ) 3156 v = self._parse_string() 3157 options = [k, v] 3158 self._match_r_paren() 3159 3160 self._match(TokenType.ALIAS) 3161 return self.expression( 3162 exp.Cache, 3163 this=table, 3164 lazy=lazy, 3165 options=options, 3166 expression=self._parse_select(nested=True), 3167 ) 3168 3169 def _parse_partition(self) -> t.Optional[exp.Partition]: 3170 if not self._match_texts(self.PARTITION_KEYWORDS): 3171 return None 3172 3173 return self.expression( 3174 exp.Partition, 3175 subpartition=self._prev.text.upper() == "SUBPARTITION", 3176 expressions=self._parse_wrapped_csv(self._parse_assignment), 3177 ) 3178 3179 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3180 def _parse_value_expression() -> t.Optional[exp.Expression]: 3181 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3182 return exp.var(self._prev.text.upper()) 3183 return self._parse_expression() 3184 3185 if self._match(TokenType.L_PAREN): 3186 expressions = self._parse_csv(_parse_value_expression) 3187 self._match_r_paren() 3188 return self.expression(exp.Tuple, expressions=expressions) 3189 3190 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3191 expression = self._parse_expression() 3192 if expression: 3193 return self.expression(exp.Tuple, expressions=[expression]) 3194 return None 3195 3196 def _parse_projections(self) -> t.List[exp.Expression]: 3197 return self._parse_expressions() 3198 3199 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3200 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3201 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3202 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3203 ) 3204 elif self._match(TokenType.FROM): 3205 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3206 # Support parentheses for duckdb FROM-first syntax 3207 select = self._parse_select() 3208 if select: 3209 select.set("from", from_) 3210 this = select 3211 else: 3212 this = exp.select("*").from_(t.cast(exp.From, from_)) 3213 else: 3214 this = ( 3215 self._parse_table(consume_pipe=True) 3216 if table 3217 else self._parse_select(nested=True, parse_set_operation=False) 3218 ) 3219 3220 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3221 # in case a modifier (e.g. join) is following 3222 if table and isinstance(this, exp.Values) and this.alias: 3223 alias = this.args["alias"].pop() 3224 this = exp.Table(this=this, alias=alias) 3225 3226 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3227 3228 return this 3229 3230 def _parse_select( 3231 self, 3232 nested: bool = False, 3233 table: bool = False, 3234 parse_subquery_alias: bool = True, 3235 parse_set_operation: bool = True, 3236 consume_pipe: bool = True, 3237 ) -> t.Optional[exp.Expression]: 3238 query = self._parse_select_query( 3239 nested=nested, 3240 table=table, 3241 parse_subquery_alias=parse_subquery_alias, 3242 parse_set_operation=parse_set_operation, 3243 ) 3244 3245 if ( 3246 consume_pipe 3247 and self._match(TokenType.PIPE_GT, advance=False) 3248 and isinstance(query, exp.Query) 3249 ): 3250 query = self._parse_pipe_syntax_query(query) 3251 query = query.subquery(copy=False) if query and table else query 3252 3253 return query 3254 3255 def _parse_select_query( 3256 self, 3257 nested: bool = False, 3258 table: bool = False, 3259 parse_subquery_alias: bool = True, 3260 parse_set_operation: bool = True, 3261 ) -> t.Optional[exp.Expression]: 3262 cte = self._parse_with() 3263 3264 if cte: 3265 this = self._parse_statement() 3266 3267 if not this: 3268 self.raise_error("Failed to parse any statement following CTE") 3269 return cte 3270 3271 if "with" in this.arg_types: 3272 this.set("with", cte) 3273 else: 3274 self.raise_error(f"{this.key} does not support CTE") 3275 this = cte 3276 3277 return this 3278 3279 # duckdb supports leading with FROM x 3280 from_ = ( 3281 self._parse_from(consume_pipe=True) 3282 if self._match(TokenType.FROM, advance=False) 3283 else None 3284 ) 3285 3286 if self._match(TokenType.SELECT): 3287 comments = self._prev_comments 3288 3289 hint = self._parse_hint() 3290 3291 if self._next and not self._next.token_type == TokenType.DOT: 3292 all_ = self._match(TokenType.ALL) 3293 distinct = self._match_set(self.DISTINCT_TOKENS) 3294 else: 3295 all_, distinct = None, None 3296 3297 kind = ( 3298 self._match(TokenType.ALIAS) 3299 and self._match_texts(("STRUCT", "VALUE")) 3300 and self._prev.text.upper() 3301 ) 3302 3303 if distinct: 3304 distinct = self.expression( 3305 exp.Distinct, 3306 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3307 ) 3308 3309 if all_ and distinct: 3310 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3311 3312 operation_modifiers = [] 3313 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3314 operation_modifiers.append(exp.var(self._prev.text.upper())) 3315 3316 limit = self._parse_limit(top=True) 3317 projections = self._parse_projections() 3318 3319 this = self.expression( 3320 exp.Select, 3321 kind=kind, 3322 hint=hint, 3323 distinct=distinct, 3324 expressions=projections, 3325 limit=limit, 3326 operation_modifiers=operation_modifiers or None, 3327 ) 3328 this.comments = comments 3329 3330 into = self._parse_into() 3331 if into: 3332 this.set("into", into) 3333 3334 if not from_: 3335 from_ = self._parse_from() 3336 3337 if from_: 3338 this.set("from", from_) 3339 3340 this = self._parse_query_modifiers(this) 3341 elif (table or nested) and self._match(TokenType.L_PAREN): 3342 this = self._parse_wrapped_select(table=table) 3343 3344 # We return early here so that the UNION isn't attached to the subquery by the 3345 # following call to _parse_set_operations, but instead becomes the parent node 3346 self._match_r_paren() 3347 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3348 elif self._match(TokenType.VALUES, advance=False): 3349 this = self._parse_derived_table_values() 3350 elif from_: 3351 this = exp.select("*").from_(from_.this, copy=False) 3352 elif self._match(TokenType.SUMMARIZE): 3353 table = self._match(TokenType.TABLE) 3354 this = self._parse_select() or self._parse_string() or self._parse_table() 3355 return self.expression(exp.Summarize, this=this, table=table) 3356 elif self._match(TokenType.DESCRIBE): 3357 this = self._parse_describe() 3358 elif self._match_text_seq("STREAM"): 3359 this = self._parse_function() 3360 if this: 3361 this = self.expression(exp.Stream, this=this) 3362 else: 3363 self._retreat(self._index - 1) 3364 else: 3365 this = None 3366 3367 return self._parse_set_operations(this) if parse_set_operation else this 3368 3369 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3370 self._match_text_seq("SEARCH") 3371 3372 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3373 3374 if not kind: 3375 return None 3376 3377 self._match_text_seq("FIRST", "BY") 3378 3379 return self.expression( 3380 exp.RecursiveWithSearch, 3381 kind=kind, 3382 this=self._parse_id_var(), 3383 expression=self._match_text_seq("SET") and self._parse_id_var(), 3384 using=self._match_text_seq("USING") and self._parse_id_var(), 3385 ) 3386 3387 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3388 if not skip_with_token and not self._match(TokenType.WITH): 3389 return None 3390 3391 comments = self._prev_comments 3392 recursive = self._match(TokenType.RECURSIVE) 3393 3394 last_comments = None 3395 expressions = [] 3396 while True: 3397 cte = self._parse_cte() 3398 if isinstance(cte, exp.CTE): 3399 expressions.append(cte) 3400 if last_comments: 3401 cte.add_comments(last_comments) 3402 3403 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3404 break 3405 else: 3406 self._match(TokenType.WITH) 3407 3408 last_comments = self._prev_comments 3409 3410 return self.expression( 3411 exp.With, 3412 comments=comments, 3413 expressions=expressions, 3414 recursive=recursive, 3415 search=self._parse_recursive_with_search(), 3416 ) 3417 3418 def _parse_cte(self) -> t.Optional[exp.CTE]: 3419 index = self._index 3420 3421 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3422 if not alias or not alias.this: 3423 self.raise_error("Expected CTE to have alias") 3424 3425 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3426 self._retreat(index) 3427 return None 3428 3429 comments = self._prev_comments 3430 3431 if self._match_text_seq("NOT", "MATERIALIZED"): 3432 materialized = False 3433 elif self._match_text_seq("MATERIALIZED"): 3434 materialized = True 3435 else: 3436 materialized = None 3437 3438 cte = self.expression( 3439 exp.CTE, 3440 this=self._parse_wrapped(self._parse_statement), 3441 alias=alias, 3442 materialized=materialized, 3443 comments=comments, 3444 ) 3445 3446 values = cte.this 3447 if isinstance(values, exp.Values): 3448 if values.alias: 3449 cte.set("this", exp.select("*").from_(values)) 3450 else: 3451 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3452 3453 return cte 3454 3455 def _parse_table_alias( 3456 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3457 ) -> t.Optional[exp.TableAlias]: 3458 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3459 # so this section tries to parse the clause version and if it fails, it treats the token 3460 # as an identifier (alias) 3461 if self._can_parse_limit_or_offset(): 3462 return None 3463 3464 any_token = self._match(TokenType.ALIAS) 3465 alias = ( 3466 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3467 or self._parse_string_as_identifier() 3468 ) 3469 3470 index = self._index 3471 if self._match(TokenType.L_PAREN): 3472 columns = self._parse_csv(self._parse_function_parameter) 3473 self._match_r_paren() if columns else self._retreat(index) 3474 else: 3475 columns = None 3476 3477 if not alias and not columns: 3478 return None 3479 3480 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3481 3482 # We bubble up comments from the Identifier to the TableAlias 3483 if isinstance(alias, exp.Identifier): 3484 table_alias.add_comments(alias.pop_comments()) 3485 3486 return table_alias 3487 3488 def _parse_subquery( 3489 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3490 ) -> t.Optional[exp.Subquery]: 3491 if not this: 3492 return None 3493 3494 return self.expression( 3495 exp.Subquery, 3496 this=this, 3497 pivots=self._parse_pivots(), 3498 alias=self._parse_table_alias() if parse_alias else None, 3499 sample=self._parse_table_sample(), 3500 ) 3501 3502 def _implicit_unnests_to_explicit(self, this: E) -> E: 3503 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3504 3505 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3506 for i, join in enumerate(this.args.get("joins") or []): 3507 table = join.this 3508 normalized_table = table.copy() 3509 normalized_table.meta["maybe_column"] = True 3510 normalized_table = _norm(normalized_table, dialect=self.dialect) 3511 3512 if isinstance(table, exp.Table) and not join.args.get("on"): 3513 if normalized_table.parts[0].name in refs: 3514 table_as_column = table.to_column() 3515 unnest = exp.Unnest(expressions=[table_as_column]) 3516 3517 # Table.to_column creates a parent Alias node that we want to convert to 3518 # a TableAlias and attach to the Unnest, so it matches the parser's output 3519 if isinstance(table.args.get("alias"), exp.TableAlias): 3520 table_as_column.replace(table_as_column.this) 3521 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3522 3523 table.replace(unnest) 3524 3525 refs.add(normalized_table.alias_or_name) 3526 3527 return this 3528 3529 def _parse_query_modifiers( 3530 self, this: t.Optional[exp.Expression] 3531 ) -> t.Optional[exp.Expression]: 3532 if isinstance(this, self.MODIFIABLES): 3533 for join in self._parse_joins(): 3534 this.append("joins", join) 3535 for lateral in iter(self._parse_lateral, None): 3536 this.append("laterals", lateral) 3537 3538 while True: 3539 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3540 modifier_token = self._curr 3541 parser = self.QUERY_MODIFIER_PARSERS[modifier_token.token_type] 3542 key, expression = parser(self) 3543 3544 if expression: 3545 if this.args.get(key): 3546 self.raise_error( 3547 f"Found multiple '{modifier_token.text.upper()}' clauses", 3548 token=modifier_token, 3549 ) 3550 3551 this.set(key, expression) 3552 if key == "limit": 3553 offset = expression.args.pop("offset", None) 3554 3555 if offset: 3556 offset = exp.Offset(expression=offset) 3557 this.set("offset", offset) 3558 3559 limit_by_expressions = expression.expressions 3560 expression.set("expressions", None) 3561 offset.set("expressions", limit_by_expressions) 3562 continue 3563 break 3564 3565 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3566 this = self._implicit_unnests_to_explicit(this) 3567 3568 return this 3569 3570 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3571 start = self._curr 3572 while self._curr: 3573 self._advance() 3574 3575 end = self._tokens[self._index - 1] 3576 return exp.Hint(expressions=[self._find_sql(start, end)]) 3577 3578 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3579 return self._parse_function_call() 3580 3581 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3582 start_index = self._index 3583 should_fallback_to_string = False 3584 3585 hints = [] 3586 try: 3587 for hint in iter( 3588 lambda: self._parse_csv( 3589 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3590 ), 3591 [], 3592 ): 3593 hints.extend(hint) 3594 except ParseError: 3595 should_fallback_to_string = True 3596 3597 if should_fallback_to_string or self._curr: 3598 self._retreat(start_index) 3599 return self._parse_hint_fallback_to_string() 3600 3601 return self.expression(exp.Hint, expressions=hints) 3602 3603 def _parse_hint(self) -> t.Optional[exp.Hint]: 3604 if self._match(TokenType.HINT) and self._prev_comments: 3605 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3606 3607 return None 3608 3609 def _parse_into(self) -> t.Optional[exp.Into]: 3610 if not self._match(TokenType.INTO): 3611 return None 3612 3613 temp = self._match(TokenType.TEMPORARY) 3614 unlogged = self._match_text_seq("UNLOGGED") 3615 self._match(TokenType.TABLE) 3616 3617 return self.expression( 3618 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3619 ) 3620 3621 def _parse_from( 3622 self, 3623 joins: bool = False, 3624 skip_from_token: bool = False, 3625 consume_pipe: bool = False, 3626 ) -> t.Optional[exp.From]: 3627 if not skip_from_token and not self._match(TokenType.FROM): 3628 return None 3629 3630 return self.expression( 3631 exp.From, 3632 comments=self._prev_comments, 3633 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3634 ) 3635 3636 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3637 return self.expression( 3638 exp.MatchRecognizeMeasure, 3639 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3640 this=self._parse_expression(), 3641 ) 3642 3643 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3644 if not self._match(TokenType.MATCH_RECOGNIZE): 3645 return None 3646 3647 self._match_l_paren() 3648 3649 partition = self._parse_partition_by() 3650 order = self._parse_order() 3651 3652 measures = ( 3653 self._parse_csv(self._parse_match_recognize_measure) 3654 if self._match_text_seq("MEASURES") 3655 else None 3656 ) 3657 3658 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3659 rows = exp.var("ONE ROW PER MATCH") 3660 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3661 text = "ALL ROWS PER MATCH" 3662 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3663 text += " SHOW EMPTY MATCHES" 3664 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3665 text += " OMIT EMPTY MATCHES" 3666 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3667 text += " WITH UNMATCHED ROWS" 3668 rows = exp.var(text) 3669 else: 3670 rows = None 3671 3672 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3673 text = "AFTER MATCH SKIP" 3674 if self._match_text_seq("PAST", "LAST", "ROW"): 3675 text += " PAST LAST ROW" 3676 elif self._match_text_seq("TO", "NEXT", "ROW"): 3677 text += " TO NEXT ROW" 3678 elif self._match_text_seq("TO", "FIRST"): 3679 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3680 elif self._match_text_seq("TO", "LAST"): 3681 text += f" TO LAST {self._advance_any().text}" # type: ignore 3682 after = exp.var(text) 3683 else: 3684 after = None 3685 3686 if self._match_text_seq("PATTERN"): 3687 self._match_l_paren() 3688 3689 if not self._curr: 3690 self.raise_error("Expecting )", self._curr) 3691 3692 paren = 1 3693 start = self._curr 3694 3695 while self._curr and paren > 0: 3696 if self._curr.token_type == TokenType.L_PAREN: 3697 paren += 1 3698 if self._curr.token_type == TokenType.R_PAREN: 3699 paren -= 1 3700 3701 end = self._prev 3702 self._advance() 3703 3704 if paren > 0: 3705 self.raise_error("Expecting )", self._curr) 3706 3707 pattern = exp.var(self._find_sql(start, end)) 3708 else: 3709 pattern = None 3710 3711 define = ( 3712 self._parse_csv(self._parse_name_as_expression) 3713 if self._match_text_seq("DEFINE") 3714 else None 3715 ) 3716 3717 self._match_r_paren() 3718 3719 return self.expression( 3720 exp.MatchRecognize, 3721 partition_by=partition, 3722 order=order, 3723 measures=measures, 3724 rows=rows, 3725 after=after, 3726 pattern=pattern, 3727 define=define, 3728 alias=self._parse_table_alias(), 3729 ) 3730 3731 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3732 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3733 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3734 cross_apply = False 3735 3736 if cross_apply is not None: 3737 this = self._parse_select(table=True) 3738 view = None 3739 outer = None 3740 elif self._match(TokenType.LATERAL): 3741 this = self._parse_select(table=True) 3742 view = self._match(TokenType.VIEW) 3743 outer = self._match(TokenType.OUTER) 3744 else: 3745 return None 3746 3747 if not this: 3748 this = ( 3749 self._parse_unnest() 3750 or self._parse_function() 3751 or self._parse_id_var(any_token=False) 3752 ) 3753 3754 while self._match(TokenType.DOT): 3755 this = exp.Dot( 3756 this=this, 3757 expression=self._parse_function() or self._parse_id_var(any_token=False), 3758 ) 3759 3760 ordinality: t.Optional[bool] = None 3761 3762 if view: 3763 table = self._parse_id_var(any_token=False) 3764 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3765 table_alias: t.Optional[exp.TableAlias] = self.expression( 3766 exp.TableAlias, this=table, columns=columns 3767 ) 3768 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3769 # We move the alias from the lateral's child node to the lateral itself 3770 table_alias = this.args["alias"].pop() 3771 else: 3772 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3773 table_alias = self._parse_table_alias() 3774 3775 return self.expression( 3776 exp.Lateral, 3777 this=this, 3778 view=view, 3779 outer=outer, 3780 alias=table_alias, 3781 cross_apply=cross_apply, 3782 ordinality=ordinality, 3783 ) 3784 3785 def _parse_join_parts( 3786 self, 3787 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3788 return ( 3789 self._match_set(self.JOIN_METHODS) and self._prev, 3790 self._match_set(self.JOIN_SIDES) and self._prev, 3791 self._match_set(self.JOIN_KINDS) and self._prev, 3792 ) 3793 3794 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3795 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3796 this = self._parse_column() 3797 if isinstance(this, exp.Column): 3798 return this.this 3799 return this 3800 3801 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3802 3803 def _parse_join( 3804 self, skip_join_token: bool = False, parse_bracket: bool = False 3805 ) -> t.Optional[exp.Join]: 3806 if self._match(TokenType.COMMA): 3807 table = self._try_parse(self._parse_table) 3808 cross_join = self.expression(exp.Join, this=table) if table else None 3809 3810 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3811 cross_join.set("kind", "CROSS") 3812 3813 return cross_join 3814 3815 index = self._index 3816 method, side, kind = self._parse_join_parts() 3817 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3818 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3819 join_comments = self._prev_comments 3820 3821 if not skip_join_token and not join: 3822 self._retreat(index) 3823 kind = None 3824 method = None 3825 side = None 3826 3827 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3828 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3829 3830 if not skip_join_token and not join and not outer_apply and not cross_apply: 3831 return None 3832 3833 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3834 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3835 kwargs["expressions"] = self._parse_csv( 3836 lambda: self._parse_table(parse_bracket=parse_bracket) 3837 ) 3838 3839 if method: 3840 kwargs["method"] = method.text 3841 if side: 3842 kwargs["side"] = side.text 3843 if kind: 3844 kwargs["kind"] = kind.text 3845 if hint: 3846 kwargs["hint"] = hint 3847 3848 if self._match(TokenType.MATCH_CONDITION): 3849 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3850 3851 if self._match(TokenType.ON): 3852 kwargs["on"] = self._parse_assignment() 3853 elif self._match(TokenType.USING): 3854 kwargs["using"] = self._parse_using_identifiers() 3855 elif ( 3856 not method 3857 and not (outer_apply or cross_apply) 3858 and not isinstance(kwargs["this"], exp.Unnest) 3859 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3860 ): 3861 index = self._index 3862 joins: t.Optional[list] = list(self._parse_joins()) 3863 3864 if joins and self._match(TokenType.ON): 3865 kwargs["on"] = self._parse_assignment() 3866 elif joins and self._match(TokenType.USING): 3867 kwargs["using"] = self._parse_using_identifiers() 3868 else: 3869 joins = None 3870 self._retreat(index) 3871 3872 kwargs["this"].set("joins", joins if joins else None) 3873 3874 kwargs["pivots"] = self._parse_pivots() 3875 3876 comments = [c for token in (method, side, kind) if token for c in token.comments] 3877 comments = (join_comments or []) + comments 3878 return self.expression(exp.Join, comments=comments, **kwargs) 3879 3880 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3881 this = self._parse_assignment() 3882 3883 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3884 return this 3885 3886 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3887 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3888 3889 return this 3890 3891 def _parse_index_params(self) -> exp.IndexParameters: 3892 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3893 3894 if self._match(TokenType.L_PAREN, advance=False): 3895 columns = self._parse_wrapped_csv(self._parse_with_operator) 3896 else: 3897 columns = None 3898 3899 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3900 partition_by = self._parse_partition_by() 3901 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3902 tablespace = ( 3903 self._parse_var(any_token=True) 3904 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3905 else None 3906 ) 3907 where = self._parse_where() 3908 3909 on = self._parse_field() if self._match(TokenType.ON) else None 3910 3911 return self.expression( 3912 exp.IndexParameters, 3913 using=using, 3914 columns=columns, 3915 include=include, 3916 partition_by=partition_by, 3917 where=where, 3918 with_storage=with_storage, 3919 tablespace=tablespace, 3920 on=on, 3921 ) 3922 3923 def _parse_index( 3924 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3925 ) -> t.Optional[exp.Index]: 3926 if index or anonymous: 3927 unique = None 3928 primary = None 3929 amp = None 3930 3931 self._match(TokenType.ON) 3932 self._match(TokenType.TABLE) # hive 3933 table = self._parse_table_parts(schema=True) 3934 else: 3935 unique = self._match(TokenType.UNIQUE) 3936 primary = self._match_text_seq("PRIMARY") 3937 amp = self._match_text_seq("AMP") 3938 3939 if not self._match(TokenType.INDEX): 3940 return None 3941 3942 index = self._parse_id_var() 3943 table = None 3944 3945 params = self._parse_index_params() 3946 3947 return self.expression( 3948 exp.Index, 3949 this=index, 3950 table=table, 3951 unique=unique, 3952 primary=primary, 3953 amp=amp, 3954 params=params, 3955 ) 3956 3957 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3958 hints: t.List[exp.Expression] = [] 3959 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3960 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3961 hints.append( 3962 self.expression( 3963 exp.WithTableHint, 3964 expressions=self._parse_csv( 3965 lambda: self._parse_function() or self._parse_var(any_token=True) 3966 ), 3967 ) 3968 ) 3969 self._match_r_paren() 3970 else: 3971 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3972 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3973 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3974 3975 self._match_set((TokenType.INDEX, TokenType.KEY)) 3976 if self._match(TokenType.FOR): 3977 hint.set("target", self._advance_any() and self._prev.text.upper()) 3978 3979 hint.set("expressions", self._parse_wrapped_id_vars()) 3980 hints.append(hint) 3981 3982 return hints or None 3983 3984 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3985 return ( 3986 (not schema and self._parse_function(optional_parens=False)) 3987 or self._parse_id_var(any_token=False) 3988 or self._parse_string_as_identifier() 3989 or self._parse_placeholder() 3990 ) 3991 3992 def _parse_table_parts( 3993 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3994 ) -> exp.Table: 3995 catalog = None 3996 db = None 3997 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3998 3999 while self._match(TokenType.DOT): 4000 if catalog: 4001 # This allows nesting the table in arbitrarily many dot expressions if needed 4002 table = self.expression( 4003 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 4004 ) 4005 else: 4006 catalog = db 4007 db = table 4008 # "" used for tsql FROM a..b case 4009 table = self._parse_table_part(schema=schema) or "" 4010 4011 if ( 4012 wildcard 4013 and self._is_connected() 4014 and (isinstance(table, exp.Identifier) or not table) 4015 and self._match(TokenType.STAR) 4016 ): 4017 if isinstance(table, exp.Identifier): 4018 table.args["this"] += "*" 4019 else: 4020 table = exp.Identifier(this="*") 4021 4022 # We bubble up comments from the Identifier to the Table 4023 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 4024 4025 if is_db_reference: 4026 catalog = db 4027 db = table 4028 table = None 4029 4030 if not table and not is_db_reference: 4031 self.raise_error(f"Expected table name but got {self._curr}") 4032 if not db and is_db_reference: 4033 self.raise_error(f"Expected database name but got {self._curr}") 4034 4035 table = self.expression( 4036 exp.Table, 4037 comments=comments, 4038 this=table, 4039 db=db, 4040 catalog=catalog, 4041 ) 4042 4043 changes = self._parse_changes() 4044 if changes: 4045 table.set("changes", changes) 4046 4047 at_before = self._parse_historical_data() 4048 if at_before: 4049 table.set("when", at_before) 4050 4051 pivots = self._parse_pivots() 4052 if pivots: 4053 table.set("pivots", pivots) 4054 4055 return table 4056 4057 def _parse_table( 4058 self, 4059 schema: bool = False, 4060 joins: bool = False, 4061 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4062 parse_bracket: bool = False, 4063 is_db_reference: bool = False, 4064 parse_partition: bool = False, 4065 consume_pipe: bool = False, 4066 ) -> t.Optional[exp.Expression]: 4067 lateral = self._parse_lateral() 4068 if lateral: 4069 return lateral 4070 4071 unnest = self._parse_unnest() 4072 if unnest: 4073 return unnest 4074 4075 values = self._parse_derived_table_values() 4076 if values: 4077 return values 4078 4079 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4080 if subquery: 4081 if not subquery.args.get("pivots"): 4082 subquery.set("pivots", self._parse_pivots()) 4083 return subquery 4084 4085 bracket = parse_bracket and self._parse_bracket(None) 4086 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4087 4088 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4089 self._parse_table 4090 ) 4091 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4092 4093 only = self._match(TokenType.ONLY) 4094 4095 this = t.cast( 4096 exp.Expression, 4097 bracket 4098 or rows_from 4099 or self._parse_bracket( 4100 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4101 ), 4102 ) 4103 4104 if only: 4105 this.set("only", only) 4106 4107 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4108 self._match_text_seq("*") 4109 4110 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4111 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4112 this.set("partition", self._parse_partition()) 4113 4114 if schema: 4115 return self._parse_schema(this=this) 4116 4117 version = self._parse_version() 4118 4119 if version: 4120 this.set("version", version) 4121 4122 if self.dialect.ALIAS_POST_TABLESAMPLE: 4123 this.set("sample", self._parse_table_sample()) 4124 4125 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4126 if alias: 4127 this.set("alias", alias) 4128 4129 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4130 return self.expression( 4131 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4132 ) 4133 4134 this.set("hints", self._parse_table_hints()) 4135 4136 if not this.args.get("pivots"): 4137 this.set("pivots", self._parse_pivots()) 4138 4139 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4140 this.set("sample", self._parse_table_sample()) 4141 4142 if joins: 4143 for join in self._parse_joins(): 4144 this.append("joins", join) 4145 4146 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4147 this.set("ordinality", True) 4148 this.set("alias", self._parse_table_alias()) 4149 4150 return this 4151 4152 def _parse_version(self) -> t.Optional[exp.Version]: 4153 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4154 this = "TIMESTAMP" 4155 elif self._match(TokenType.VERSION_SNAPSHOT): 4156 this = "VERSION" 4157 else: 4158 return None 4159 4160 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4161 kind = self._prev.text.upper() 4162 start = self._parse_bitwise() 4163 self._match_texts(("TO", "AND")) 4164 end = self._parse_bitwise() 4165 expression: t.Optional[exp.Expression] = self.expression( 4166 exp.Tuple, expressions=[start, end] 4167 ) 4168 elif self._match_text_seq("CONTAINED", "IN"): 4169 kind = "CONTAINED IN" 4170 expression = self.expression( 4171 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4172 ) 4173 elif self._match(TokenType.ALL): 4174 kind = "ALL" 4175 expression = None 4176 else: 4177 self._match_text_seq("AS", "OF") 4178 kind = "AS OF" 4179 expression = self._parse_type() 4180 4181 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4182 4183 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4184 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4185 index = self._index 4186 historical_data = None 4187 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4188 this = self._prev.text.upper() 4189 kind = ( 4190 self._match(TokenType.L_PAREN) 4191 and self._match_texts(self.HISTORICAL_DATA_KIND) 4192 and self._prev.text.upper() 4193 ) 4194 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4195 4196 if expression: 4197 self._match_r_paren() 4198 historical_data = self.expression( 4199 exp.HistoricalData, this=this, kind=kind, expression=expression 4200 ) 4201 else: 4202 self._retreat(index) 4203 4204 return historical_data 4205 4206 def _parse_changes(self) -> t.Optional[exp.Changes]: 4207 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4208 return None 4209 4210 information = self._parse_var(any_token=True) 4211 self._match_r_paren() 4212 4213 return self.expression( 4214 exp.Changes, 4215 information=information, 4216 at_before=self._parse_historical_data(), 4217 end=self._parse_historical_data(), 4218 ) 4219 4220 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4221 if not self._match(TokenType.UNNEST): 4222 return None 4223 4224 expressions = self._parse_wrapped_csv(self._parse_equality) 4225 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4226 4227 alias = self._parse_table_alias() if with_alias else None 4228 4229 if alias: 4230 if self.dialect.UNNEST_COLUMN_ONLY: 4231 if alias.args.get("columns"): 4232 self.raise_error("Unexpected extra column alias in unnest.") 4233 4234 alias.set("columns", [alias.this]) 4235 alias.set("this", None) 4236 4237 columns = alias.args.get("columns") or [] 4238 if offset and len(expressions) < len(columns): 4239 offset = columns.pop() 4240 4241 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4242 self._match(TokenType.ALIAS) 4243 offset = self._parse_id_var( 4244 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4245 ) or exp.to_identifier("offset") 4246 4247 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4248 4249 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4250 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4251 if not is_derived and not ( 4252 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4253 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4254 ): 4255 return None 4256 4257 expressions = self._parse_csv(self._parse_value) 4258 alias = self._parse_table_alias() 4259 4260 if is_derived: 4261 self._match_r_paren() 4262 4263 return self.expression( 4264 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4265 ) 4266 4267 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4268 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4269 as_modifier and self._match_text_seq("USING", "SAMPLE") 4270 ): 4271 return None 4272 4273 bucket_numerator = None 4274 bucket_denominator = None 4275 bucket_field = None 4276 percent = None 4277 size = None 4278 seed = None 4279 4280 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4281 matched_l_paren = self._match(TokenType.L_PAREN) 4282 4283 if self.TABLESAMPLE_CSV: 4284 num = None 4285 expressions = self._parse_csv(self._parse_primary) 4286 else: 4287 expressions = None 4288 num = ( 4289 self._parse_factor() 4290 if self._match(TokenType.NUMBER, advance=False) 4291 else self._parse_primary() or self._parse_placeholder() 4292 ) 4293 4294 if self._match_text_seq("BUCKET"): 4295 bucket_numerator = self._parse_number() 4296 self._match_text_seq("OUT", "OF") 4297 bucket_denominator = bucket_denominator = self._parse_number() 4298 self._match(TokenType.ON) 4299 bucket_field = self._parse_field() 4300 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4301 percent = num 4302 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4303 size = num 4304 else: 4305 percent = num 4306 4307 if matched_l_paren: 4308 self._match_r_paren() 4309 4310 if self._match(TokenType.L_PAREN): 4311 method = self._parse_var(upper=True) 4312 seed = self._match(TokenType.COMMA) and self._parse_number() 4313 self._match_r_paren() 4314 elif self._match_texts(("SEED", "REPEATABLE")): 4315 seed = self._parse_wrapped(self._parse_number) 4316 4317 if not method and self.DEFAULT_SAMPLING_METHOD: 4318 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4319 4320 return self.expression( 4321 exp.TableSample, 4322 expressions=expressions, 4323 method=method, 4324 bucket_numerator=bucket_numerator, 4325 bucket_denominator=bucket_denominator, 4326 bucket_field=bucket_field, 4327 percent=percent, 4328 size=size, 4329 seed=seed, 4330 ) 4331 4332 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4333 return list(iter(self._parse_pivot, None)) or None 4334 4335 def _parse_joins(self) -> t.Iterator[exp.Join]: 4336 return iter(self._parse_join, None) 4337 4338 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4339 if not self._match(TokenType.INTO): 4340 return None 4341 4342 return self.expression( 4343 exp.UnpivotColumns, 4344 this=self._match_text_seq("NAME") and self._parse_column(), 4345 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4346 ) 4347 4348 # https://duckdb.org/docs/sql/statements/pivot 4349 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4350 def _parse_on() -> t.Optional[exp.Expression]: 4351 this = self._parse_bitwise() 4352 4353 if self._match(TokenType.IN): 4354 # PIVOT ... ON col IN (row_val1, row_val2) 4355 return self._parse_in(this) 4356 if self._match(TokenType.ALIAS, advance=False): 4357 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4358 return self._parse_alias(this) 4359 4360 return this 4361 4362 this = self._parse_table() 4363 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4364 into = self._parse_unpivot_columns() 4365 using = self._match(TokenType.USING) and self._parse_csv( 4366 lambda: self._parse_alias(self._parse_function()) 4367 ) 4368 group = self._parse_group() 4369 4370 return self.expression( 4371 exp.Pivot, 4372 this=this, 4373 expressions=expressions, 4374 using=using, 4375 group=group, 4376 unpivot=is_unpivot, 4377 into=into, 4378 ) 4379 4380 def _parse_pivot_in(self) -> exp.In: 4381 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4382 this = self._parse_select_or_expression() 4383 4384 self._match(TokenType.ALIAS) 4385 alias = self._parse_bitwise() 4386 if alias: 4387 if isinstance(alias, exp.Column) and not alias.db: 4388 alias = alias.this 4389 return self.expression(exp.PivotAlias, this=this, alias=alias) 4390 4391 return this 4392 4393 value = self._parse_column() 4394 4395 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4396 self.raise_error("Expecting IN (") 4397 4398 if self._match(TokenType.ANY): 4399 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4400 else: 4401 exprs = self._parse_csv(_parse_aliased_expression) 4402 4403 self._match_r_paren() 4404 return self.expression(exp.In, this=value, expressions=exprs) 4405 4406 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4407 func = self._parse_function() 4408 if not func: 4409 self.raise_error("Expecting an aggregation function in PIVOT") 4410 4411 return self._parse_alias(func) 4412 4413 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4414 index = self._index 4415 include_nulls = None 4416 4417 if self._match(TokenType.PIVOT): 4418 unpivot = False 4419 elif self._match(TokenType.UNPIVOT): 4420 unpivot = True 4421 4422 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4423 if self._match_text_seq("INCLUDE", "NULLS"): 4424 include_nulls = True 4425 elif self._match_text_seq("EXCLUDE", "NULLS"): 4426 include_nulls = False 4427 else: 4428 return None 4429 4430 expressions = [] 4431 4432 if not self._match(TokenType.L_PAREN): 4433 self._retreat(index) 4434 return None 4435 4436 if unpivot: 4437 expressions = self._parse_csv(self._parse_column) 4438 else: 4439 expressions = self._parse_csv(self._parse_pivot_aggregation) 4440 4441 if not expressions: 4442 self.raise_error("Failed to parse PIVOT's aggregation list") 4443 4444 if not self._match(TokenType.FOR): 4445 self.raise_error("Expecting FOR") 4446 4447 fields = [] 4448 while True: 4449 field = self._try_parse(self._parse_pivot_in) 4450 if not field: 4451 break 4452 fields.append(field) 4453 4454 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4455 self._parse_bitwise 4456 ) 4457 4458 group = self._parse_group() 4459 4460 self._match_r_paren() 4461 4462 pivot = self.expression( 4463 exp.Pivot, 4464 expressions=expressions, 4465 fields=fields, 4466 unpivot=unpivot, 4467 include_nulls=include_nulls, 4468 default_on_null=default_on_null, 4469 group=group, 4470 ) 4471 4472 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4473 pivot.set("alias", self._parse_table_alias()) 4474 4475 if not unpivot: 4476 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4477 4478 columns: t.List[exp.Expression] = [] 4479 all_fields = [] 4480 for pivot_field in pivot.fields: 4481 pivot_field_expressions = pivot_field.expressions 4482 4483 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4484 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4485 continue 4486 4487 all_fields.append( 4488 [ 4489 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4490 for fld in pivot_field_expressions 4491 ] 4492 ) 4493 4494 if all_fields: 4495 if names: 4496 all_fields.append(names) 4497 4498 # Generate all possible combinations of the pivot columns 4499 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4500 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4501 for fld_parts_tuple in itertools.product(*all_fields): 4502 fld_parts = list(fld_parts_tuple) 4503 4504 if names and self.PREFIXED_PIVOT_COLUMNS: 4505 # Move the "name" to the front of the list 4506 fld_parts.insert(0, fld_parts.pop(-1)) 4507 4508 columns.append(exp.to_identifier("_".join(fld_parts))) 4509 4510 pivot.set("columns", columns) 4511 4512 return pivot 4513 4514 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4515 return [agg.alias for agg in aggregations if agg.alias] 4516 4517 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4518 if not skip_where_token and not self._match(TokenType.PREWHERE): 4519 return None 4520 4521 return self.expression( 4522 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4523 ) 4524 4525 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4526 if not skip_where_token and not self._match(TokenType.WHERE): 4527 return None 4528 4529 return self.expression( 4530 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4531 ) 4532 4533 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4534 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4535 return None 4536 comments = self._prev_comments 4537 4538 elements: t.Dict[str, t.Any] = defaultdict(list) 4539 4540 if self._match(TokenType.ALL): 4541 elements["all"] = True 4542 elif self._match(TokenType.DISTINCT): 4543 elements["all"] = False 4544 4545 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4546 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4547 4548 while True: 4549 index = self._index 4550 4551 elements["expressions"].extend( 4552 self._parse_csv( 4553 lambda: None 4554 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4555 else self._parse_assignment() 4556 ) 4557 ) 4558 4559 before_with_index = self._index 4560 with_prefix = self._match(TokenType.WITH) 4561 4562 if self._match(TokenType.ROLLUP): 4563 elements["rollup"].append( 4564 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4565 ) 4566 elif self._match(TokenType.CUBE): 4567 elements["cube"].append( 4568 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4569 ) 4570 elif self._match(TokenType.GROUPING_SETS): 4571 elements["grouping_sets"].append( 4572 self.expression( 4573 exp.GroupingSets, 4574 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4575 ) 4576 ) 4577 elif self._match_text_seq("TOTALS"): 4578 elements["totals"] = True # type: ignore 4579 4580 if before_with_index <= self._index <= before_with_index + 1: 4581 self._retreat(before_with_index) 4582 break 4583 4584 if index == self._index: 4585 break 4586 4587 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4588 4589 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4590 return self.expression( 4591 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4592 ) 4593 4594 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4595 if self._match(TokenType.L_PAREN): 4596 grouping_set = self._parse_csv(self._parse_column) 4597 self._match_r_paren() 4598 return self.expression(exp.Tuple, expressions=grouping_set) 4599 4600 return self._parse_column() 4601 4602 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4603 if not skip_having_token and not self._match(TokenType.HAVING): 4604 return None 4605 return self.expression( 4606 exp.Having, comments=self._prev_comments, this=self._parse_assignment() 4607 ) 4608 4609 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4610 if not self._match(TokenType.QUALIFY): 4611 return None 4612 return self.expression(exp.Qualify, this=self._parse_assignment()) 4613 4614 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4615 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4616 exp.Prior, this=self._parse_bitwise() 4617 ) 4618 connect = self._parse_assignment() 4619 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4620 return connect 4621 4622 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4623 if skip_start_token: 4624 start = None 4625 elif self._match(TokenType.START_WITH): 4626 start = self._parse_assignment() 4627 else: 4628 return None 4629 4630 self._match(TokenType.CONNECT_BY) 4631 nocycle = self._match_text_seq("NOCYCLE") 4632 connect = self._parse_connect_with_prior() 4633 4634 if not start and self._match(TokenType.START_WITH): 4635 start = self._parse_assignment() 4636 4637 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4638 4639 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4640 this = self._parse_id_var(any_token=True) 4641 if self._match(TokenType.ALIAS): 4642 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4643 return this 4644 4645 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4646 if self._match_text_seq("INTERPOLATE"): 4647 return self._parse_wrapped_csv(self._parse_name_as_expression) 4648 return None 4649 4650 def _parse_order( 4651 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4652 ) -> t.Optional[exp.Expression]: 4653 siblings = None 4654 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4655 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4656 return this 4657 4658 siblings = True 4659 4660 return self.expression( 4661 exp.Order, 4662 comments=self._prev_comments, 4663 this=this, 4664 expressions=self._parse_csv(self._parse_ordered), 4665 siblings=siblings, 4666 ) 4667 4668 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4669 if not self._match(token): 4670 return None 4671 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4672 4673 def _parse_ordered( 4674 self, parse_method: t.Optional[t.Callable] = None 4675 ) -> t.Optional[exp.Ordered]: 4676 this = parse_method() if parse_method else self._parse_assignment() 4677 if not this: 4678 return None 4679 4680 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4681 this = exp.var("ALL") 4682 4683 asc = self._match(TokenType.ASC) 4684 desc = self._match(TokenType.DESC) or (asc and False) 4685 4686 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4687 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4688 4689 nulls_first = is_nulls_first or False 4690 explicitly_null_ordered = is_nulls_first or is_nulls_last 4691 4692 if ( 4693 not explicitly_null_ordered 4694 and ( 4695 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4696 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4697 ) 4698 and self.dialect.NULL_ORDERING != "nulls_are_last" 4699 ): 4700 nulls_first = True 4701 4702 if self._match_text_seq("WITH", "FILL"): 4703 with_fill = self.expression( 4704 exp.WithFill, 4705 **{ # type: ignore 4706 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4707 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4708 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4709 "interpolate": self._parse_interpolate(), 4710 }, 4711 ) 4712 else: 4713 with_fill = None 4714 4715 return self.expression( 4716 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4717 ) 4718 4719 def _parse_limit_options(self) -> exp.LimitOptions: 4720 percent = self._match(TokenType.PERCENT) 4721 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4722 self._match_text_seq("ONLY") 4723 with_ties = self._match_text_seq("WITH", "TIES") 4724 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4725 4726 def _parse_limit( 4727 self, 4728 this: t.Optional[exp.Expression] = None, 4729 top: bool = False, 4730 skip_limit_token: bool = False, 4731 ) -> t.Optional[exp.Expression]: 4732 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4733 comments = self._prev_comments 4734 if top: 4735 limit_paren = self._match(TokenType.L_PAREN) 4736 expression = self._parse_term() if limit_paren else self._parse_number() 4737 4738 if limit_paren: 4739 self._match_r_paren() 4740 4741 limit_options = self._parse_limit_options() 4742 else: 4743 limit_options = None 4744 expression = self._parse_term() 4745 4746 if self._match(TokenType.COMMA): 4747 offset = expression 4748 expression = self._parse_term() 4749 else: 4750 offset = None 4751 4752 limit_exp = self.expression( 4753 exp.Limit, 4754 this=this, 4755 expression=expression, 4756 offset=offset, 4757 comments=comments, 4758 limit_options=limit_options, 4759 expressions=self._parse_limit_by(), 4760 ) 4761 4762 return limit_exp 4763 4764 if self._match(TokenType.FETCH): 4765 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4766 direction = self._prev.text.upper() if direction else "FIRST" 4767 4768 count = self._parse_field(tokens=self.FETCH_TOKENS) 4769 4770 return self.expression( 4771 exp.Fetch, 4772 direction=direction, 4773 count=count, 4774 limit_options=self._parse_limit_options(), 4775 ) 4776 4777 return this 4778 4779 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4780 if not self._match(TokenType.OFFSET): 4781 return this 4782 4783 count = self._parse_term() 4784 self._match_set((TokenType.ROW, TokenType.ROWS)) 4785 4786 return self.expression( 4787 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4788 ) 4789 4790 def _can_parse_limit_or_offset(self) -> bool: 4791 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4792 return False 4793 4794 index = self._index 4795 result = bool( 4796 self._try_parse(self._parse_limit, retreat=True) 4797 or self._try_parse(self._parse_offset, retreat=True) 4798 ) 4799 self._retreat(index) 4800 return result 4801 4802 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4803 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4804 4805 def _parse_locks(self) -> t.List[exp.Lock]: 4806 locks = [] 4807 while True: 4808 update, key = None, None 4809 if self._match_text_seq("FOR", "UPDATE"): 4810 update = True 4811 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4812 "LOCK", "IN", "SHARE", "MODE" 4813 ): 4814 update = False 4815 elif self._match_text_seq("FOR", "KEY", "SHARE"): 4816 update, key = False, True 4817 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 4818 update, key = True, True 4819 else: 4820 break 4821 4822 expressions = None 4823 if self._match_text_seq("OF"): 4824 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4825 4826 wait: t.Optional[bool | exp.Expression] = None 4827 if self._match_text_seq("NOWAIT"): 4828 wait = True 4829 elif self._match_text_seq("WAIT"): 4830 wait = self._parse_primary() 4831 elif self._match_text_seq("SKIP", "LOCKED"): 4832 wait = False 4833 4834 locks.append( 4835 self.expression( 4836 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 4837 ) 4838 ) 4839 4840 return locks 4841 4842 def parse_set_operation( 4843 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4844 ) -> t.Optional[exp.Expression]: 4845 start = self._index 4846 _, side_token, kind_token = self._parse_join_parts() 4847 4848 side = side_token.text if side_token else None 4849 kind = kind_token.text if kind_token else None 4850 4851 if not self._match_set(self.SET_OPERATIONS): 4852 self._retreat(start) 4853 return None 4854 4855 token_type = self._prev.token_type 4856 4857 if token_type == TokenType.UNION: 4858 operation: t.Type[exp.SetOperation] = exp.Union 4859 elif token_type == TokenType.EXCEPT: 4860 operation = exp.Except 4861 else: 4862 operation = exp.Intersect 4863 4864 comments = self._prev.comments 4865 4866 if self._match(TokenType.DISTINCT): 4867 distinct: t.Optional[bool] = True 4868 elif self._match(TokenType.ALL): 4869 distinct = False 4870 else: 4871 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4872 if distinct is None: 4873 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4874 4875 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4876 "STRICT", "CORRESPONDING" 4877 ) 4878 if self._match_text_seq("CORRESPONDING"): 4879 by_name = True 4880 if not side and not kind: 4881 kind = "INNER" 4882 4883 on_column_list = None 4884 if by_name and self._match_texts(("ON", "BY")): 4885 on_column_list = self._parse_wrapped_csv(self._parse_column) 4886 4887 expression = self._parse_select( 4888 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4889 ) 4890 4891 return self.expression( 4892 operation, 4893 comments=comments, 4894 this=this, 4895 distinct=distinct, 4896 by_name=by_name, 4897 expression=expression, 4898 side=side, 4899 kind=kind, 4900 on=on_column_list, 4901 ) 4902 4903 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4904 while this: 4905 setop = self.parse_set_operation(this) 4906 if not setop: 4907 break 4908 this = setop 4909 4910 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4911 expression = this.expression 4912 4913 if expression: 4914 for arg in self.SET_OP_MODIFIERS: 4915 expr = expression.args.get(arg) 4916 if expr: 4917 this.set(arg, expr.pop()) 4918 4919 return this 4920 4921 def _parse_expression(self) -> t.Optional[exp.Expression]: 4922 return self._parse_alias(self._parse_assignment()) 4923 4924 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4925 this = self._parse_disjunction() 4926 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4927 # This allows us to parse <non-identifier token> := <expr> 4928 this = exp.column( 4929 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4930 ) 4931 4932 while self._match_set(self.ASSIGNMENT): 4933 if isinstance(this, exp.Column) and len(this.parts) == 1: 4934 this = this.this 4935 4936 this = self.expression( 4937 self.ASSIGNMENT[self._prev.token_type], 4938 this=this, 4939 comments=self._prev_comments, 4940 expression=self._parse_assignment(), 4941 ) 4942 4943 return this 4944 4945 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4946 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4947 4948 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4949 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4950 4951 def _parse_equality(self) -> t.Optional[exp.Expression]: 4952 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4953 4954 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4955 return self._parse_tokens(self._parse_range, self.COMPARISON) 4956 4957 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4958 this = this or self._parse_bitwise() 4959 negate = self._match(TokenType.NOT) 4960 4961 if self._match_set(self.RANGE_PARSERS): 4962 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4963 if not expression: 4964 return this 4965 4966 this = expression 4967 elif self._match(TokenType.ISNULL): 4968 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4969 4970 # Postgres supports ISNULL and NOTNULL for conditions. 4971 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4972 if self._match(TokenType.NOTNULL): 4973 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4974 this = self.expression(exp.Not, this=this) 4975 4976 if negate: 4977 this = self._negate_range(this) 4978 4979 if self._match(TokenType.IS): 4980 this = self._parse_is(this) 4981 4982 return this 4983 4984 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4985 if not this: 4986 return this 4987 4988 return self.expression(exp.Not, this=this) 4989 4990 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4991 index = self._index - 1 4992 negate = self._match(TokenType.NOT) 4993 4994 if self._match_text_seq("DISTINCT", "FROM"): 4995 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4996 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4997 4998 if self._match(TokenType.JSON): 4999 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 5000 5001 if self._match_text_seq("WITH"): 5002 _with = True 5003 elif self._match_text_seq("WITHOUT"): 5004 _with = False 5005 else: 5006 _with = None 5007 5008 unique = self._match(TokenType.UNIQUE) 5009 self._match_text_seq("KEYS") 5010 expression: t.Optional[exp.Expression] = self.expression( 5011 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 5012 ) 5013 else: 5014 expression = self._parse_primary() or self._parse_null() 5015 if not expression: 5016 self._retreat(index) 5017 return None 5018 5019 this = self.expression(exp.Is, this=this, expression=expression) 5020 return self.expression(exp.Not, this=this) if negate else this 5021 5022 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 5023 unnest = self._parse_unnest(with_alias=False) 5024 if unnest: 5025 this = self.expression(exp.In, this=this, unnest=unnest) 5026 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 5027 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 5028 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 5029 5030 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 5031 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 5032 else: 5033 this = self.expression(exp.In, this=this, expressions=expressions) 5034 5035 if matched_l_paren: 5036 self._match_r_paren(this) 5037 elif not self._match(TokenType.R_BRACKET, expression=this): 5038 self.raise_error("Expecting ]") 5039 else: 5040 this = self.expression(exp.In, this=this, field=self._parse_column()) 5041 5042 return this 5043 5044 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 5045 symmetric = None 5046 if self._match_text_seq("SYMMETRIC"): 5047 symmetric = True 5048 elif self._match_text_seq("ASYMMETRIC"): 5049 symmetric = False 5050 5051 low = self._parse_bitwise() 5052 self._match(TokenType.AND) 5053 high = self._parse_bitwise() 5054 5055 return self.expression( 5056 exp.Between, 5057 this=this, 5058 low=low, 5059 high=high, 5060 symmetric=symmetric, 5061 ) 5062 5063 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5064 if not self._match(TokenType.ESCAPE): 5065 return this 5066 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 5067 5068 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5069 index = self._index 5070 5071 if not self._match(TokenType.INTERVAL) and match_interval: 5072 return None 5073 5074 if self._match(TokenType.STRING, advance=False): 5075 this = self._parse_primary() 5076 else: 5077 this = self._parse_term() 5078 5079 if not this or ( 5080 isinstance(this, exp.Column) 5081 and not this.table 5082 and not this.this.quoted 5083 and this.name.upper() == "IS" 5084 ): 5085 self._retreat(index) 5086 return None 5087 5088 unit = self._parse_function() or ( 5089 not self._match(TokenType.ALIAS, advance=False) 5090 and self._parse_var(any_token=True, upper=True) 5091 ) 5092 5093 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5094 # each INTERVAL expression into this canonical form so it's easy to transpile 5095 if this and this.is_number: 5096 this = exp.Literal.string(this.to_py()) 5097 elif this and this.is_string: 5098 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5099 if parts and unit: 5100 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5101 unit = None 5102 self._retreat(self._index - 1) 5103 5104 if len(parts) == 1: 5105 this = exp.Literal.string(parts[0][0]) 5106 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5107 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5108 unit = self.expression( 5109 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5110 ) 5111 5112 interval = self.expression(exp.Interval, this=this, unit=unit) 5113 5114 index = self._index 5115 self._match(TokenType.PLUS) 5116 5117 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5118 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5119 return self.expression( 5120 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5121 ) 5122 5123 self._retreat(index) 5124 return interval 5125 5126 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5127 this = self._parse_term() 5128 5129 while True: 5130 if self._match_set(self.BITWISE): 5131 this = self.expression( 5132 self.BITWISE[self._prev.token_type], 5133 this=this, 5134 expression=self._parse_term(), 5135 ) 5136 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5137 this = self.expression( 5138 exp.DPipe, 5139 this=this, 5140 expression=self._parse_term(), 5141 safe=not self.dialect.STRICT_STRING_CONCAT, 5142 ) 5143 elif self._match(TokenType.DQMARK): 5144 this = self.expression( 5145 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5146 ) 5147 elif self._match_pair(TokenType.LT, TokenType.LT): 5148 this = self.expression( 5149 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5150 ) 5151 elif self._match_pair(TokenType.GT, TokenType.GT): 5152 this = self.expression( 5153 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5154 ) 5155 else: 5156 break 5157 5158 return this 5159 5160 def _parse_term(self) -> t.Optional[exp.Expression]: 5161 this = self._parse_factor() 5162 5163 while self._match_set(self.TERM): 5164 klass = self.TERM[self._prev.token_type] 5165 comments = self._prev_comments 5166 expression = self._parse_factor() 5167 5168 this = self.expression(klass, this=this, comments=comments, expression=expression) 5169 5170 if isinstance(this, exp.Collate): 5171 expr = this.expression 5172 5173 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5174 # fallback to Identifier / Var 5175 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5176 ident = expr.this 5177 if isinstance(ident, exp.Identifier): 5178 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5179 5180 return this 5181 5182 def _parse_factor(self) -> t.Optional[exp.Expression]: 5183 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5184 this = parse_method() 5185 5186 while self._match_set(self.FACTOR): 5187 klass = self.FACTOR[self._prev.token_type] 5188 comments = self._prev_comments 5189 expression = parse_method() 5190 5191 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5192 self._retreat(self._index - 1) 5193 return this 5194 5195 this = self.expression(klass, this=this, comments=comments, expression=expression) 5196 5197 if isinstance(this, exp.Div): 5198 this.args["typed"] = self.dialect.TYPED_DIVISION 5199 this.args["safe"] = self.dialect.SAFE_DIVISION 5200 5201 return this 5202 5203 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5204 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5205 5206 def _parse_unary(self) -> t.Optional[exp.Expression]: 5207 if self._match_set(self.UNARY_PARSERS): 5208 return self.UNARY_PARSERS[self._prev.token_type](self) 5209 return self._parse_at_time_zone(self._parse_type()) 5210 5211 def _parse_type( 5212 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5213 ) -> t.Optional[exp.Expression]: 5214 interval = parse_interval and self._parse_interval() 5215 if interval: 5216 return interval 5217 5218 index = self._index 5219 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5220 5221 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5222 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5223 if isinstance(data_type, exp.Cast): 5224 # This constructor can contain ops directly after it, for instance struct unnesting: 5225 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5226 return self._parse_column_ops(data_type) 5227 5228 if data_type: 5229 index2 = self._index 5230 this = self._parse_primary() 5231 5232 if isinstance(this, exp.Literal): 5233 literal = this.name 5234 this = self._parse_column_ops(this) 5235 5236 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5237 if parser: 5238 return parser(self, this, data_type) 5239 5240 if ( 5241 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5242 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5243 and TIME_ZONE_RE.search(literal) 5244 ): 5245 data_type = exp.DataType.build("TIMESTAMPTZ") 5246 5247 return self.expression(exp.Cast, this=this, to=data_type) 5248 5249 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5250 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5251 # 5252 # If the index difference here is greater than 1, that means the parser itself must have 5253 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5254 # 5255 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5256 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5257 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5258 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5259 # 5260 # In these cases, we don't really want to return the converted type, but instead retreat 5261 # and try to parse a Column or Identifier in the section below. 5262 if data_type.expressions and index2 - index > 1: 5263 self._retreat(index2) 5264 return self._parse_column_ops(data_type) 5265 5266 self._retreat(index) 5267 5268 if fallback_to_identifier: 5269 return self._parse_id_var() 5270 5271 this = self._parse_column() 5272 return this and self._parse_column_ops(this) 5273 5274 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5275 this = self._parse_type() 5276 if not this: 5277 return None 5278 5279 if isinstance(this, exp.Column) and not this.table: 5280 this = exp.var(this.name.upper()) 5281 5282 return self.expression( 5283 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5284 ) 5285 5286 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5287 type_name = identifier.name 5288 5289 while self._match(TokenType.DOT): 5290 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5291 5292 return exp.DataType.build(type_name, dialect=self.dialect, udt=True) 5293 5294 def _parse_types( 5295 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5296 ) -> t.Optional[exp.Expression]: 5297 index = self._index 5298 5299 this: t.Optional[exp.Expression] = None 5300 prefix = self._match_text_seq("SYSUDTLIB", ".") 5301 5302 if not self._match_set(self.TYPE_TOKENS): 5303 identifier = allow_identifiers and self._parse_id_var( 5304 any_token=False, tokens=(TokenType.VAR,) 5305 ) 5306 if isinstance(identifier, exp.Identifier): 5307 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5308 5309 if len(tokens) != 1: 5310 self.raise_error("Unexpected identifier", self._prev) 5311 5312 if tokens[0].token_type in self.TYPE_TOKENS: 5313 self._prev = tokens[0] 5314 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5315 this = self._parse_user_defined_type(identifier) 5316 else: 5317 self._retreat(self._index - 1) 5318 return None 5319 else: 5320 return None 5321 5322 type_token = self._prev.token_type 5323 5324 if type_token == TokenType.PSEUDO_TYPE: 5325 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5326 5327 if type_token == TokenType.OBJECT_IDENTIFIER: 5328 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5329 5330 # https://materialize.com/docs/sql/types/map/ 5331 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5332 key_type = self._parse_types( 5333 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5334 ) 5335 if not self._match(TokenType.FARROW): 5336 self._retreat(index) 5337 return None 5338 5339 value_type = self._parse_types( 5340 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5341 ) 5342 if not self._match(TokenType.R_BRACKET): 5343 self._retreat(index) 5344 return None 5345 5346 return exp.DataType( 5347 this=exp.DataType.Type.MAP, 5348 expressions=[key_type, value_type], 5349 nested=True, 5350 prefix=prefix, 5351 ) 5352 5353 nested = type_token in self.NESTED_TYPE_TOKENS 5354 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5355 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5356 expressions = None 5357 maybe_func = False 5358 5359 if self._match(TokenType.L_PAREN): 5360 if is_struct: 5361 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5362 elif nested: 5363 expressions = self._parse_csv( 5364 lambda: self._parse_types( 5365 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5366 ) 5367 ) 5368 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5369 this = expressions[0] 5370 this.set("nullable", True) 5371 self._match_r_paren() 5372 return this 5373 elif type_token in self.ENUM_TYPE_TOKENS: 5374 expressions = self._parse_csv(self._parse_equality) 5375 elif is_aggregate: 5376 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5377 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5378 ) 5379 if not func_or_ident: 5380 return None 5381 expressions = [func_or_ident] 5382 if self._match(TokenType.COMMA): 5383 expressions.extend( 5384 self._parse_csv( 5385 lambda: self._parse_types( 5386 check_func=check_func, 5387 schema=schema, 5388 allow_identifiers=allow_identifiers, 5389 ) 5390 ) 5391 ) 5392 else: 5393 expressions = self._parse_csv(self._parse_type_size) 5394 5395 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5396 if type_token == TokenType.VECTOR and len(expressions) == 2: 5397 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5398 5399 if not expressions or not self._match(TokenType.R_PAREN): 5400 self._retreat(index) 5401 return None 5402 5403 maybe_func = True 5404 5405 values: t.Optional[t.List[exp.Expression]] = None 5406 5407 if nested and self._match(TokenType.LT): 5408 if is_struct: 5409 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5410 else: 5411 expressions = self._parse_csv( 5412 lambda: self._parse_types( 5413 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5414 ) 5415 ) 5416 5417 if not self._match(TokenType.GT): 5418 self.raise_error("Expecting >") 5419 5420 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5421 values = self._parse_csv(self._parse_assignment) 5422 if not values and is_struct: 5423 values = None 5424 self._retreat(self._index - 1) 5425 else: 5426 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5427 5428 if type_token in self.TIMESTAMPS: 5429 if self._match_text_seq("WITH", "TIME", "ZONE"): 5430 maybe_func = False 5431 tz_type = ( 5432 exp.DataType.Type.TIMETZ 5433 if type_token in self.TIMES 5434 else exp.DataType.Type.TIMESTAMPTZ 5435 ) 5436 this = exp.DataType(this=tz_type, expressions=expressions) 5437 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5438 maybe_func = False 5439 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5440 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5441 maybe_func = False 5442 elif type_token == TokenType.INTERVAL: 5443 unit = self._parse_var(upper=True) 5444 if unit: 5445 if self._match_text_seq("TO"): 5446 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5447 5448 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5449 else: 5450 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5451 elif type_token == TokenType.VOID: 5452 this = exp.DataType(this=exp.DataType.Type.NULL) 5453 5454 if maybe_func and check_func: 5455 index2 = self._index 5456 peek = self._parse_string() 5457 5458 if not peek: 5459 self._retreat(index) 5460 return None 5461 5462 self._retreat(index2) 5463 5464 if not this: 5465 if self._match_text_seq("UNSIGNED"): 5466 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5467 if not unsigned_type_token: 5468 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5469 5470 type_token = unsigned_type_token or type_token 5471 5472 this = exp.DataType( 5473 this=exp.DataType.Type[type_token.value], 5474 expressions=expressions, 5475 nested=nested, 5476 prefix=prefix, 5477 ) 5478 5479 # Empty arrays/structs are allowed 5480 if values is not None: 5481 cls = exp.Struct if is_struct else exp.Array 5482 this = exp.cast(cls(expressions=values), this, copy=False) 5483 5484 elif expressions: 5485 this.set("expressions", expressions) 5486 5487 # https://materialize.com/docs/sql/types/list/#type-name 5488 while self._match(TokenType.LIST): 5489 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5490 5491 index = self._index 5492 5493 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5494 matched_array = self._match(TokenType.ARRAY) 5495 5496 while self._curr: 5497 datatype_token = self._prev.token_type 5498 matched_l_bracket = self._match(TokenType.L_BRACKET) 5499 5500 if (not matched_l_bracket and not matched_array) or ( 5501 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5502 ): 5503 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5504 # not to be confused with the fixed size array parsing 5505 break 5506 5507 matched_array = False 5508 values = self._parse_csv(self._parse_assignment) or None 5509 if ( 5510 values 5511 and not schema 5512 and ( 5513 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5514 ) 5515 ): 5516 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5517 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5518 self._retreat(index) 5519 break 5520 5521 this = exp.DataType( 5522 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5523 ) 5524 self._match(TokenType.R_BRACKET) 5525 5526 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5527 converter = self.TYPE_CONVERTERS.get(this.this) 5528 if converter: 5529 this = converter(t.cast(exp.DataType, this)) 5530 5531 return this 5532 5533 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5534 index = self._index 5535 5536 if ( 5537 self._curr 5538 and self._next 5539 and self._curr.token_type in self.TYPE_TOKENS 5540 and self._next.token_type in self.TYPE_TOKENS 5541 ): 5542 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5543 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5544 this = self._parse_id_var() 5545 else: 5546 this = ( 5547 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5548 or self._parse_id_var() 5549 ) 5550 5551 self._match(TokenType.COLON) 5552 5553 if ( 5554 type_required 5555 and not isinstance(this, exp.DataType) 5556 and not self._match_set(self.TYPE_TOKENS, advance=False) 5557 ): 5558 self._retreat(index) 5559 return self._parse_types() 5560 5561 return self._parse_column_def(this) 5562 5563 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5564 if not self._match_text_seq("AT", "TIME", "ZONE"): 5565 return this 5566 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5567 5568 def _parse_column(self) -> t.Optional[exp.Expression]: 5569 this = self._parse_column_reference() 5570 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5571 5572 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5573 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5574 5575 return column 5576 5577 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5578 this = self._parse_field() 5579 if ( 5580 not this 5581 and self._match(TokenType.VALUES, advance=False) 5582 and self.VALUES_FOLLOWED_BY_PAREN 5583 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5584 ): 5585 this = self._parse_id_var() 5586 5587 if isinstance(this, exp.Identifier): 5588 # We bubble up comments from the Identifier to the Column 5589 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5590 5591 return this 5592 5593 def _parse_colon_as_variant_extract( 5594 self, this: t.Optional[exp.Expression] 5595 ) -> t.Optional[exp.Expression]: 5596 casts = [] 5597 json_path = [] 5598 escape = None 5599 5600 while self._match(TokenType.COLON): 5601 start_index = self._index 5602 5603 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5604 path = self._parse_column_ops( 5605 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5606 ) 5607 5608 # The cast :: operator has a lower precedence than the extraction operator :, so 5609 # we rearrange the AST appropriately to avoid casting the JSON path 5610 while isinstance(path, exp.Cast): 5611 casts.append(path.to) 5612 path = path.this 5613 5614 if casts: 5615 dcolon_offset = next( 5616 i 5617 for i, t in enumerate(self._tokens[start_index:]) 5618 if t.token_type == TokenType.DCOLON 5619 ) 5620 end_token = self._tokens[start_index + dcolon_offset - 1] 5621 else: 5622 end_token = self._prev 5623 5624 if path: 5625 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5626 # it'll roundtrip to a string literal in GET_PATH 5627 if isinstance(path, exp.Identifier) and path.quoted: 5628 escape = True 5629 5630 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5631 5632 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5633 # Databricks transforms it back to the colon/dot notation 5634 if json_path: 5635 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5636 5637 if json_path_expr: 5638 json_path_expr.set("escape", escape) 5639 5640 this = self.expression( 5641 exp.JSONExtract, 5642 this=this, 5643 expression=json_path_expr, 5644 variant_extract=True, 5645 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 5646 ) 5647 5648 while casts: 5649 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5650 5651 return this 5652 5653 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5654 return self._parse_types() 5655 5656 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5657 this = self._parse_bracket(this) 5658 5659 while self._match_set(self.COLUMN_OPERATORS): 5660 op_token = self._prev.token_type 5661 op = self.COLUMN_OPERATORS.get(op_token) 5662 5663 if op_token in self.CAST_COLUMN_OPERATORS: 5664 field = self._parse_dcolon() 5665 if not field: 5666 self.raise_error("Expected type") 5667 elif op and self._curr: 5668 field = self._parse_column_reference() or self._parse_bracket() 5669 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5670 field = self._parse_column_ops(field) 5671 else: 5672 field = self._parse_field(any_token=True, anonymous_func=True) 5673 5674 # Function calls can be qualified, e.g., x.y.FOO() 5675 # This converts the final AST to a series of Dots leading to the function call 5676 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5677 if isinstance(field, (exp.Func, exp.Window)) and this: 5678 this = this.transform( 5679 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5680 ) 5681 5682 if op: 5683 this = op(self, this, field) 5684 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5685 this = self.expression( 5686 exp.Column, 5687 comments=this.comments, 5688 this=field, 5689 table=this.this, 5690 db=this.args.get("table"), 5691 catalog=this.args.get("db"), 5692 ) 5693 elif isinstance(field, exp.Window): 5694 # Move the exp.Dot's to the window's function 5695 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5696 field.set("this", window_func) 5697 this = field 5698 else: 5699 this = self.expression(exp.Dot, this=this, expression=field) 5700 5701 if field and field.comments: 5702 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5703 5704 this = self._parse_bracket(this) 5705 5706 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5707 5708 def _parse_paren(self) -> t.Optional[exp.Expression]: 5709 if not self._match(TokenType.L_PAREN): 5710 return None 5711 5712 comments = self._prev_comments 5713 query = self._parse_select() 5714 5715 if query: 5716 expressions = [query] 5717 else: 5718 expressions = self._parse_expressions() 5719 5720 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5721 5722 if not this and self._match(TokenType.R_PAREN, advance=False): 5723 this = self.expression(exp.Tuple) 5724 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5725 this = self._parse_subquery(this=this, parse_alias=False) 5726 elif isinstance(this, exp.Subquery): 5727 this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False) 5728 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5729 this = self.expression(exp.Tuple, expressions=expressions) 5730 else: 5731 this = self.expression(exp.Paren, this=this) 5732 5733 if this: 5734 this.add_comments(comments) 5735 5736 self._match_r_paren(expression=this) 5737 return this 5738 5739 def _parse_primary(self) -> t.Optional[exp.Expression]: 5740 if self._match_set(self.PRIMARY_PARSERS): 5741 token_type = self._prev.token_type 5742 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5743 5744 if token_type == TokenType.STRING: 5745 expressions = [primary] 5746 while self._match(TokenType.STRING): 5747 expressions.append(exp.Literal.string(self._prev.text)) 5748 5749 if len(expressions) > 1: 5750 return self.expression(exp.Concat, expressions=expressions) 5751 5752 return primary 5753 5754 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5755 return exp.Literal.number(f"0.{self._prev.text}") 5756 5757 return self._parse_paren() 5758 5759 def _parse_field( 5760 self, 5761 any_token: bool = False, 5762 tokens: t.Optional[t.Collection[TokenType]] = None, 5763 anonymous_func: bool = False, 5764 ) -> t.Optional[exp.Expression]: 5765 if anonymous_func: 5766 field = ( 5767 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5768 or self._parse_primary() 5769 ) 5770 else: 5771 field = self._parse_primary() or self._parse_function( 5772 anonymous=anonymous_func, any_token=any_token 5773 ) 5774 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5775 5776 def _parse_function( 5777 self, 5778 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5779 anonymous: bool = False, 5780 optional_parens: bool = True, 5781 any_token: bool = False, 5782 ) -> t.Optional[exp.Expression]: 5783 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5784 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5785 fn_syntax = False 5786 if ( 5787 self._match(TokenType.L_BRACE, advance=False) 5788 and self._next 5789 and self._next.text.upper() == "FN" 5790 ): 5791 self._advance(2) 5792 fn_syntax = True 5793 5794 func = self._parse_function_call( 5795 functions=functions, 5796 anonymous=anonymous, 5797 optional_parens=optional_parens, 5798 any_token=any_token, 5799 ) 5800 5801 if fn_syntax: 5802 self._match(TokenType.R_BRACE) 5803 5804 return func 5805 5806 def _parse_function_call( 5807 self, 5808 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5809 anonymous: bool = False, 5810 optional_parens: bool = True, 5811 any_token: bool = False, 5812 ) -> t.Optional[exp.Expression]: 5813 if not self._curr: 5814 return None 5815 5816 comments = self._curr.comments 5817 prev = self._prev 5818 token = self._curr 5819 token_type = self._curr.token_type 5820 this = self._curr.text 5821 upper = this.upper() 5822 5823 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5824 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5825 self._advance() 5826 return self._parse_window(parser(self)) 5827 5828 if not self._next or self._next.token_type != TokenType.L_PAREN: 5829 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5830 self._advance() 5831 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5832 5833 return None 5834 5835 if any_token: 5836 if token_type in self.RESERVED_TOKENS: 5837 return None 5838 elif token_type not in self.FUNC_TOKENS: 5839 return None 5840 5841 self._advance(2) 5842 5843 parser = self.FUNCTION_PARSERS.get(upper) 5844 if parser and not anonymous: 5845 this = parser(self) 5846 else: 5847 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5848 5849 if subquery_predicate: 5850 expr = None 5851 if self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5852 expr = self._parse_select() 5853 self._match_r_paren() 5854 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 5855 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 5856 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 5857 self._advance(-1) 5858 expr = self._parse_bitwise() 5859 5860 if expr: 5861 return self.expression(subquery_predicate, comments=comments, this=expr) 5862 5863 if functions is None: 5864 functions = self.FUNCTIONS 5865 5866 function = functions.get(upper) 5867 known_function = function and not anonymous 5868 5869 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5870 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5871 5872 post_func_comments = self._curr and self._curr.comments 5873 if known_function and post_func_comments: 5874 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5875 # call we'll construct it as exp.Anonymous, even if it's "known" 5876 if any( 5877 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5878 for comment in post_func_comments 5879 ): 5880 known_function = False 5881 5882 if alias and known_function: 5883 args = self._kv_to_prop_eq(args) 5884 5885 if known_function: 5886 func_builder = t.cast(t.Callable, function) 5887 5888 if "dialect" in func_builder.__code__.co_varnames: 5889 func = func_builder(args, dialect=self.dialect) 5890 else: 5891 func = func_builder(args) 5892 5893 func = self.validate_expression(func, args) 5894 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5895 func.meta["name"] = this 5896 5897 this = func 5898 else: 5899 if token_type == TokenType.IDENTIFIER: 5900 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5901 5902 this = self.expression(exp.Anonymous, this=this, expressions=args) 5903 this = this.update_positions(token) 5904 5905 if isinstance(this, exp.Expression): 5906 this.add_comments(comments) 5907 5908 self._match_r_paren(this) 5909 return self._parse_window(this) 5910 5911 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5912 return expression 5913 5914 def _kv_to_prop_eq( 5915 self, expressions: t.List[exp.Expression], parse_map: bool = False 5916 ) -> t.List[exp.Expression]: 5917 transformed = [] 5918 5919 for index, e in enumerate(expressions): 5920 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5921 if isinstance(e, exp.Alias): 5922 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5923 5924 if not isinstance(e, exp.PropertyEQ): 5925 e = self.expression( 5926 exp.PropertyEQ, 5927 this=e.this if parse_map else exp.to_identifier(e.this.name), 5928 expression=e.expression, 5929 ) 5930 5931 if isinstance(e.this, exp.Column): 5932 e.this.replace(e.this.this) 5933 else: 5934 e = self._to_prop_eq(e, index) 5935 5936 transformed.append(e) 5937 5938 return transformed 5939 5940 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5941 return self._parse_statement() 5942 5943 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5944 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5945 5946 def _parse_user_defined_function( 5947 self, kind: t.Optional[TokenType] = None 5948 ) -> t.Optional[exp.Expression]: 5949 this = self._parse_table_parts(schema=True) 5950 5951 if not self._match(TokenType.L_PAREN): 5952 return this 5953 5954 expressions = self._parse_csv(self._parse_function_parameter) 5955 self._match_r_paren() 5956 return self.expression( 5957 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5958 ) 5959 5960 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5961 literal = self._parse_primary() 5962 if literal: 5963 return self.expression(exp.Introducer, this=token.text, expression=literal) 5964 5965 return self._identifier_expression(token) 5966 5967 def _parse_session_parameter(self) -> exp.SessionParameter: 5968 kind = None 5969 this = self._parse_id_var() or self._parse_primary() 5970 5971 if this and self._match(TokenType.DOT): 5972 kind = this.name 5973 this = self._parse_var() or self._parse_primary() 5974 5975 return self.expression(exp.SessionParameter, this=this, kind=kind) 5976 5977 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5978 return self._parse_id_var() 5979 5980 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5981 index = self._index 5982 5983 if self._match(TokenType.L_PAREN): 5984 expressions = t.cast( 5985 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5986 ) 5987 5988 if not self._match(TokenType.R_PAREN): 5989 self._retreat(index) 5990 else: 5991 expressions = [self._parse_lambda_arg()] 5992 5993 if self._match_set(self.LAMBDAS): 5994 return self.LAMBDAS[self._prev.token_type](self, expressions) 5995 5996 self._retreat(index) 5997 5998 this: t.Optional[exp.Expression] 5999 6000 if self._match(TokenType.DISTINCT): 6001 this = self.expression( 6002 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 6003 ) 6004 else: 6005 this = self._parse_select_or_expression(alias=alias) 6006 6007 return self._parse_limit( 6008 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 6009 ) 6010 6011 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6012 index = self._index 6013 if not self._match(TokenType.L_PAREN): 6014 return this 6015 6016 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 6017 # expr can be of both types 6018 if self._match_set(self.SELECT_START_TOKENS): 6019 self._retreat(index) 6020 return this 6021 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 6022 self._match_r_paren() 6023 return self.expression(exp.Schema, this=this, expressions=args) 6024 6025 def _parse_field_def(self) -> t.Optional[exp.Expression]: 6026 return self._parse_column_def(self._parse_field(any_token=True)) 6027 6028 def _parse_column_def( 6029 self, this: t.Optional[exp.Expression], computed_column: bool = True 6030 ) -> t.Optional[exp.Expression]: 6031 # column defs are not really columns, they're identifiers 6032 if isinstance(this, exp.Column): 6033 this = this.this 6034 6035 if not computed_column: 6036 self._match(TokenType.ALIAS) 6037 6038 kind = self._parse_types(schema=True) 6039 6040 if self._match_text_seq("FOR", "ORDINALITY"): 6041 return self.expression(exp.ColumnDef, this=this, ordinality=True) 6042 6043 constraints: t.List[exp.Expression] = [] 6044 6045 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 6046 ("ALIAS", "MATERIALIZED") 6047 ): 6048 persisted = self._prev.text.upper() == "MATERIALIZED" 6049 constraint_kind = exp.ComputedColumnConstraint( 6050 this=self._parse_assignment(), 6051 persisted=persisted or self._match_text_seq("PERSISTED"), 6052 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 6053 ) 6054 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 6055 elif ( 6056 kind 6057 and self._match(TokenType.ALIAS, advance=False) 6058 and ( 6059 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 6060 or (self._next and self._next.token_type == TokenType.L_PAREN) 6061 ) 6062 ): 6063 self._advance() 6064 constraints.append( 6065 self.expression( 6066 exp.ColumnConstraint, 6067 kind=exp.ComputedColumnConstraint( 6068 this=self._parse_disjunction(), 6069 persisted=self._match_texts(("STORED", "VIRTUAL")) 6070 and self._prev.text.upper() == "STORED", 6071 ), 6072 ) 6073 ) 6074 6075 while True: 6076 constraint = self._parse_column_constraint() 6077 if not constraint: 6078 break 6079 constraints.append(constraint) 6080 6081 if not kind and not constraints: 6082 return this 6083 6084 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6085 6086 def _parse_auto_increment( 6087 self, 6088 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6089 start = None 6090 increment = None 6091 order = None 6092 6093 if self._match(TokenType.L_PAREN, advance=False): 6094 args = self._parse_wrapped_csv(self._parse_bitwise) 6095 start = seq_get(args, 0) 6096 increment = seq_get(args, 1) 6097 elif self._match_text_seq("START"): 6098 start = self._parse_bitwise() 6099 self._match_text_seq("INCREMENT") 6100 increment = self._parse_bitwise() 6101 if self._match_text_seq("ORDER"): 6102 order = True 6103 elif self._match_text_seq("NOORDER"): 6104 order = False 6105 6106 if start and increment: 6107 return exp.GeneratedAsIdentityColumnConstraint( 6108 start=start, increment=increment, this=False, order=order 6109 ) 6110 6111 return exp.AutoIncrementColumnConstraint() 6112 6113 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6114 if not self._match_text_seq("REFRESH"): 6115 self._retreat(self._index - 1) 6116 return None 6117 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6118 6119 def _parse_compress(self) -> exp.CompressColumnConstraint: 6120 if self._match(TokenType.L_PAREN, advance=False): 6121 return self.expression( 6122 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6123 ) 6124 6125 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6126 6127 def _parse_generated_as_identity( 6128 self, 6129 ) -> ( 6130 exp.GeneratedAsIdentityColumnConstraint 6131 | exp.ComputedColumnConstraint 6132 | exp.GeneratedAsRowColumnConstraint 6133 ): 6134 if self._match_text_seq("BY", "DEFAULT"): 6135 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6136 this = self.expression( 6137 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6138 ) 6139 else: 6140 self._match_text_seq("ALWAYS") 6141 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6142 6143 self._match(TokenType.ALIAS) 6144 6145 if self._match_text_seq("ROW"): 6146 start = self._match_text_seq("START") 6147 if not start: 6148 self._match(TokenType.END) 6149 hidden = self._match_text_seq("HIDDEN") 6150 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6151 6152 identity = self._match_text_seq("IDENTITY") 6153 6154 if self._match(TokenType.L_PAREN): 6155 if self._match(TokenType.START_WITH): 6156 this.set("start", self._parse_bitwise()) 6157 if self._match_text_seq("INCREMENT", "BY"): 6158 this.set("increment", self._parse_bitwise()) 6159 if self._match_text_seq("MINVALUE"): 6160 this.set("minvalue", self._parse_bitwise()) 6161 if self._match_text_seq("MAXVALUE"): 6162 this.set("maxvalue", self._parse_bitwise()) 6163 6164 if self._match_text_seq("CYCLE"): 6165 this.set("cycle", True) 6166 elif self._match_text_seq("NO", "CYCLE"): 6167 this.set("cycle", False) 6168 6169 if not identity: 6170 this.set("expression", self._parse_range()) 6171 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6172 args = self._parse_csv(self._parse_bitwise) 6173 this.set("start", seq_get(args, 0)) 6174 this.set("increment", seq_get(args, 1)) 6175 6176 self._match_r_paren() 6177 6178 return this 6179 6180 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6181 self._match_text_seq("LENGTH") 6182 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6183 6184 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6185 if self._match_text_seq("NULL"): 6186 return self.expression(exp.NotNullColumnConstraint) 6187 if self._match_text_seq("CASESPECIFIC"): 6188 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6189 if self._match_text_seq("FOR", "REPLICATION"): 6190 return self.expression(exp.NotForReplicationColumnConstraint) 6191 6192 # Unconsume the `NOT` token 6193 self._retreat(self._index - 1) 6194 return None 6195 6196 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6197 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6198 6199 procedure_option_follows = ( 6200 self._match(TokenType.WITH, advance=False) 6201 and self._next 6202 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6203 ) 6204 6205 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6206 return self.expression( 6207 exp.ColumnConstraint, 6208 this=this, 6209 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6210 ) 6211 6212 return this 6213 6214 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6215 if not self._match(TokenType.CONSTRAINT): 6216 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6217 6218 return self.expression( 6219 exp.Constraint, 6220 this=self._parse_id_var(), 6221 expressions=self._parse_unnamed_constraints(), 6222 ) 6223 6224 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6225 constraints = [] 6226 while True: 6227 constraint = self._parse_unnamed_constraint() or self._parse_function() 6228 if not constraint: 6229 break 6230 constraints.append(constraint) 6231 6232 return constraints 6233 6234 def _parse_unnamed_constraint( 6235 self, constraints: t.Optional[t.Collection[str]] = None 6236 ) -> t.Optional[exp.Expression]: 6237 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6238 constraints or self.CONSTRAINT_PARSERS 6239 ): 6240 return None 6241 6242 constraint = self._prev.text.upper() 6243 if constraint not in self.CONSTRAINT_PARSERS: 6244 self.raise_error(f"No parser found for schema constraint {constraint}.") 6245 6246 return self.CONSTRAINT_PARSERS[constraint](self) 6247 6248 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6249 return self._parse_id_var(any_token=False) 6250 6251 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6252 self._match_texts(("KEY", "INDEX")) 6253 return self.expression( 6254 exp.UniqueColumnConstraint, 6255 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6256 this=self._parse_schema(self._parse_unique_key()), 6257 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6258 on_conflict=self._parse_on_conflict(), 6259 options=self._parse_key_constraint_options(), 6260 ) 6261 6262 def _parse_key_constraint_options(self) -> t.List[str]: 6263 options = [] 6264 while True: 6265 if not self._curr: 6266 break 6267 6268 if self._match(TokenType.ON): 6269 action = None 6270 on = self._advance_any() and self._prev.text 6271 6272 if self._match_text_seq("NO", "ACTION"): 6273 action = "NO ACTION" 6274 elif self._match_text_seq("CASCADE"): 6275 action = "CASCADE" 6276 elif self._match_text_seq("RESTRICT"): 6277 action = "RESTRICT" 6278 elif self._match_pair(TokenType.SET, TokenType.NULL): 6279 action = "SET NULL" 6280 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6281 action = "SET DEFAULT" 6282 else: 6283 self.raise_error("Invalid key constraint") 6284 6285 options.append(f"ON {on} {action}") 6286 else: 6287 var = self._parse_var_from_options( 6288 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6289 ) 6290 if not var: 6291 break 6292 options.append(var.name) 6293 6294 return options 6295 6296 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6297 if match and not self._match(TokenType.REFERENCES): 6298 return None 6299 6300 expressions = None 6301 this = self._parse_table(schema=True) 6302 options = self._parse_key_constraint_options() 6303 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6304 6305 def _parse_foreign_key(self) -> exp.ForeignKey: 6306 expressions = ( 6307 self._parse_wrapped_id_vars() 6308 if not self._match(TokenType.REFERENCES, advance=False) 6309 else None 6310 ) 6311 reference = self._parse_references() 6312 on_options = {} 6313 6314 while self._match(TokenType.ON): 6315 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6316 self.raise_error("Expected DELETE or UPDATE") 6317 6318 kind = self._prev.text.lower() 6319 6320 if self._match_text_seq("NO", "ACTION"): 6321 action = "NO ACTION" 6322 elif self._match(TokenType.SET): 6323 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6324 action = "SET " + self._prev.text.upper() 6325 else: 6326 self._advance() 6327 action = self._prev.text.upper() 6328 6329 on_options[kind] = action 6330 6331 return self.expression( 6332 exp.ForeignKey, 6333 expressions=expressions, 6334 reference=reference, 6335 options=self._parse_key_constraint_options(), 6336 **on_options, # type: ignore 6337 ) 6338 6339 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6340 return self._parse_ordered() or self._parse_field() 6341 6342 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6343 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6344 self._retreat(self._index - 1) 6345 return None 6346 6347 id_vars = self._parse_wrapped_id_vars() 6348 return self.expression( 6349 exp.PeriodForSystemTimeConstraint, 6350 this=seq_get(id_vars, 0), 6351 expression=seq_get(id_vars, 1), 6352 ) 6353 6354 def _parse_primary_key( 6355 self, wrapped_optional: bool = False, in_props: bool = False 6356 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6357 desc = ( 6358 self._match_set((TokenType.ASC, TokenType.DESC)) 6359 and self._prev.token_type == TokenType.DESC 6360 ) 6361 6362 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6363 return self.expression( 6364 exp.PrimaryKeyColumnConstraint, 6365 desc=desc, 6366 options=self._parse_key_constraint_options(), 6367 ) 6368 6369 expressions = self._parse_wrapped_csv( 6370 self._parse_primary_key_part, optional=wrapped_optional 6371 ) 6372 6373 return self.expression( 6374 exp.PrimaryKey, 6375 expressions=expressions, 6376 include=self._parse_index_params(), 6377 options=self._parse_key_constraint_options(), 6378 ) 6379 6380 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6381 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6382 6383 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6384 """ 6385 Parses a datetime column in ODBC format. We parse the column into the corresponding 6386 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6387 same as we did for `DATE('yyyy-mm-dd')`. 6388 6389 Reference: 6390 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6391 """ 6392 self._match(TokenType.VAR) 6393 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6394 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6395 if not self._match(TokenType.R_BRACE): 6396 self.raise_error("Expected }") 6397 return expression 6398 6399 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6400 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6401 return this 6402 6403 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6404 map_token = seq_get(self._tokens, self._index - 2) 6405 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6406 else: 6407 parse_map = False 6408 6409 bracket_kind = self._prev.token_type 6410 if ( 6411 bracket_kind == TokenType.L_BRACE 6412 and self._curr 6413 and self._curr.token_type == TokenType.VAR 6414 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6415 ): 6416 return self._parse_odbc_datetime_literal() 6417 6418 expressions = self._parse_csv( 6419 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6420 ) 6421 6422 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6423 self.raise_error("Expected ]") 6424 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6425 self.raise_error("Expected }") 6426 6427 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6428 if bracket_kind == TokenType.L_BRACE: 6429 this = self.expression( 6430 exp.Struct, 6431 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6432 ) 6433 elif not this: 6434 this = build_array_constructor( 6435 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6436 ) 6437 else: 6438 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6439 if constructor_type: 6440 return build_array_constructor( 6441 constructor_type, 6442 args=expressions, 6443 bracket_kind=bracket_kind, 6444 dialect=self.dialect, 6445 ) 6446 6447 expressions = apply_index_offset( 6448 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6449 ) 6450 this = self.expression( 6451 exp.Bracket, 6452 this=this, 6453 expressions=expressions, 6454 comments=this.pop_comments(), 6455 ) 6456 6457 self._add_comments(this) 6458 return self._parse_bracket(this) 6459 6460 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6461 if self._match(TokenType.COLON): 6462 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6463 return this 6464 6465 def _parse_case(self) -> t.Optional[exp.Expression]: 6466 ifs = [] 6467 default = None 6468 6469 comments = self._prev_comments 6470 expression = self._parse_assignment() 6471 6472 while self._match(TokenType.WHEN): 6473 this = self._parse_assignment() 6474 self._match(TokenType.THEN) 6475 then = self._parse_assignment() 6476 ifs.append(self.expression(exp.If, this=this, true=then)) 6477 6478 if self._match(TokenType.ELSE): 6479 default = self._parse_assignment() 6480 6481 if not self._match(TokenType.END): 6482 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6483 default = exp.column("interval") 6484 else: 6485 self.raise_error("Expected END after CASE", self._prev) 6486 6487 return self.expression( 6488 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6489 ) 6490 6491 def _parse_if(self) -> t.Optional[exp.Expression]: 6492 if self._match(TokenType.L_PAREN): 6493 args = self._parse_csv( 6494 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6495 ) 6496 this = self.validate_expression(exp.If.from_arg_list(args), args) 6497 self._match_r_paren() 6498 else: 6499 index = self._index - 1 6500 6501 if self.NO_PAREN_IF_COMMANDS and index == 0: 6502 return self._parse_as_command(self._prev) 6503 6504 condition = self._parse_assignment() 6505 6506 if not condition: 6507 self._retreat(index) 6508 return None 6509 6510 self._match(TokenType.THEN) 6511 true = self._parse_assignment() 6512 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6513 self._match(TokenType.END) 6514 this = self.expression(exp.If, this=condition, true=true, false=false) 6515 6516 return this 6517 6518 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6519 if not self._match_text_seq("VALUE", "FOR"): 6520 self._retreat(self._index - 1) 6521 return None 6522 6523 return self.expression( 6524 exp.NextValueFor, 6525 this=self._parse_column(), 6526 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6527 ) 6528 6529 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6530 this = self._parse_function() or self._parse_var_or_string(upper=True) 6531 6532 if self._match(TokenType.FROM): 6533 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6534 6535 if not self._match(TokenType.COMMA): 6536 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6537 6538 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6539 6540 def _parse_gap_fill(self) -> exp.GapFill: 6541 self._match(TokenType.TABLE) 6542 this = self._parse_table() 6543 6544 self._match(TokenType.COMMA) 6545 args = [this, *self._parse_csv(self._parse_lambda)] 6546 6547 gap_fill = exp.GapFill.from_arg_list(args) 6548 return self.validate_expression(gap_fill, args) 6549 6550 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6551 this = self._parse_assignment() 6552 6553 if not self._match(TokenType.ALIAS): 6554 if self._match(TokenType.COMMA): 6555 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6556 6557 self.raise_error("Expected AS after CAST") 6558 6559 fmt = None 6560 to = self._parse_types() 6561 6562 default = self._match(TokenType.DEFAULT) 6563 if default: 6564 default = self._parse_bitwise() 6565 self._match_text_seq("ON", "CONVERSION", "ERROR") 6566 6567 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6568 fmt_string = self._parse_string() 6569 fmt = self._parse_at_time_zone(fmt_string) 6570 6571 if not to: 6572 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6573 if to.this in exp.DataType.TEMPORAL_TYPES: 6574 this = self.expression( 6575 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6576 this=this, 6577 format=exp.Literal.string( 6578 format_time( 6579 fmt_string.this if fmt_string else "", 6580 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6581 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6582 ) 6583 ), 6584 safe=safe, 6585 ) 6586 6587 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6588 this.set("zone", fmt.args["zone"]) 6589 return this 6590 elif not to: 6591 self.raise_error("Expected TYPE after CAST") 6592 elif isinstance(to, exp.Identifier): 6593 to = exp.DataType.build(to.name, dialect=self.dialect, udt=True) 6594 elif to.this == exp.DataType.Type.CHAR: 6595 if self._match(TokenType.CHARACTER_SET): 6596 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6597 6598 return self.build_cast( 6599 strict=strict, 6600 this=this, 6601 to=to, 6602 format=fmt, 6603 safe=safe, 6604 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6605 default=default, 6606 ) 6607 6608 def _parse_string_agg(self) -> exp.GroupConcat: 6609 if self._match(TokenType.DISTINCT): 6610 args: t.List[t.Optional[exp.Expression]] = [ 6611 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6612 ] 6613 if self._match(TokenType.COMMA): 6614 args.extend(self._parse_csv(self._parse_assignment)) 6615 else: 6616 args = self._parse_csv(self._parse_assignment) # type: ignore 6617 6618 if self._match_text_seq("ON", "OVERFLOW"): 6619 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6620 if self._match_text_seq("ERROR"): 6621 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6622 else: 6623 self._match_text_seq("TRUNCATE") 6624 on_overflow = self.expression( 6625 exp.OverflowTruncateBehavior, 6626 this=self._parse_string(), 6627 with_count=( 6628 self._match_text_seq("WITH", "COUNT") 6629 or not self._match_text_seq("WITHOUT", "COUNT") 6630 ), 6631 ) 6632 else: 6633 on_overflow = None 6634 6635 index = self._index 6636 if not self._match(TokenType.R_PAREN) and args: 6637 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6638 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6639 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6640 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6641 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6642 6643 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6644 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6645 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6646 if not self._match_text_seq("WITHIN", "GROUP"): 6647 self._retreat(index) 6648 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6649 6650 # The corresponding match_r_paren will be called in parse_function (caller) 6651 self._match_l_paren() 6652 6653 return self.expression( 6654 exp.GroupConcat, 6655 this=self._parse_order(this=seq_get(args, 0)), 6656 separator=seq_get(args, 1), 6657 on_overflow=on_overflow, 6658 ) 6659 6660 def _parse_convert( 6661 self, strict: bool, safe: t.Optional[bool] = None 6662 ) -> t.Optional[exp.Expression]: 6663 this = self._parse_bitwise() 6664 6665 if self._match(TokenType.USING): 6666 to: t.Optional[exp.Expression] = self.expression( 6667 exp.CharacterSet, this=self._parse_var() 6668 ) 6669 elif self._match(TokenType.COMMA): 6670 to = self._parse_types() 6671 else: 6672 to = None 6673 6674 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 6675 6676 def _parse_xml_table(self) -> exp.XMLTable: 6677 namespaces = None 6678 passing = None 6679 columns = None 6680 6681 if self._match_text_seq("XMLNAMESPACES", "("): 6682 namespaces = self._parse_xml_namespace() 6683 self._match_text_seq(")", ",") 6684 6685 this = self._parse_string() 6686 6687 if self._match_text_seq("PASSING"): 6688 # The BY VALUE keywords are optional and are provided for semantic clarity 6689 self._match_text_seq("BY", "VALUE") 6690 passing = self._parse_csv(self._parse_column) 6691 6692 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6693 6694 if self._match_text_seq("COLUMNS"): 6695 columns = self._parse_csv(self._parse_field_def) 6696 6697 return self.expression( 6698 exp.XMLTable, 6699 this=this, 6700 namespaces=namespaces, 6701 passing=passing, 6702 columns=columns, 6703 by_ref=by_ref, 6704 ) 6705 6706 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6707 namespaces = [] 6708 6709 while True: 6710 if self._match(TokenType.DEFAULT): 6711 uri = self._parse_string() 6712 else: 6713 uri = self._parse_alias(self._parse_string()) 6714 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6715 if not self._match(TokenType.COMMA): 6716 break 6717 6718 return namespaces 6719 6720 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 6721 args = self._parse_csv(self._parse_assignment) 6722 6723 if len(args) < 3: 6724 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6725 6726 return self.expression(exp.DecodeCase, expressions=args) 6727 6728 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6729 self._match_text_seq("KEY") 6730 key = self._parse_column() 6731 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6732 self._match_text_seq("VALUE") 6733 value = self._parse_bitwise() 6734 6735 if not key and not value: 6736 return None 6737 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6738 6739 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6740 if not this or not self._match_text_seq("FORMAT", "JSON"): 6741 return this 6742 6743 return self.expression(exp.FormatJson, this=this) 6744 6745 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6746 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6747 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6748 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6749 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6750 else: 6751 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6752 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6753 6754 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6755 6756 if not empty and not error and not null: 6757 return None 6758 6759 return self.expression( 6760 exp.OnCondition, 6761 empty=empty, 6762 error=error, 6763 null=null, 6764 ) 6765 6766 def _parse_on_handling( 6767 self, on: str, *values: str 6768 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6769 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6770 for value in values: 6771 if self._match_text_seq(value, "ON", on): 6772 return f"{value} ON {on}" 6773 6774 index = self._index 6775 if self._match(TokenType.DEFAULT): 6776 default_value = self._parse_bitwise() 6777 if self._match_text_seq("ON", on): 6778 return default_value 6779 6780 self._retreat(index) 6781 6782 return None 6783 6784 @t.overload 6785 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6786 6787 @t.overload 6788 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6789 6790 def _parse_json_object(self, agg=False): 6791 star = self._parse_star() 6792 expressions = ( 6793 [star] 6794 if star 6795 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6796 ) 6797 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6798 6799 unique_keys = None 6800 if self._match_text_seq("WITH", "UNIQUE"): 6801 unique_keys = True 6802 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6803 unique_keys = False 6804 6805 self._match_text_seq("KEYS") 6806 6807 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6808 self._parse_type() 6809 ) 6810 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6811 6812 return self.expression( 6813 exp.JSONObjectAgg if agg else exp.JSONObject, 6814 expressions=expressions, 6815 null_handling=null_handling, 6816 unique_keys=unique_keys, 6817 return_type=return_type, 6818 encoding=encoding, 6819 ) 6820 6821 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6822 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6823 if not self._match_text_seq("NESTED"): 6824 this = self._parse_id_var() 6825 kind = self._parse_types(allow_identifiers=False) 6826 nested = None 6827 else: 6828 this = None 6829 kind = None 6830 nested = True 6831 6832 path = self._match_text_seq("PATH") and self._parse_string() 6833 nested_schema = nested and self._parse_json_schema() 6834 6835 return self.expression( 6836 exp.JSONColumnDef, 6837 this=this, 6838 kind=kind, 6839 path=path, 6840 nested_schema=nested_schema, 6841 ) 6842 6843 def _parse_json_schema(self) -> exp.JSONSchema: 6844 self._match_text_seq("COLUMNS") 6845 return self.expression( 6846 exp.JSONSchema, 6847 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6848 ) 6849 6850 def _parse_json_table(self) -> exp.JSONTable: 6851 this = self._parse_format_json(self._parse_bitwise()) 6852 path = self._match(TokenType.COMMA) and self._parse_string() 6853 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6854 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6855 schema = self._parse_json_schema() 6856 6857 return exp.JSONTable( 6858 this=this, 6859 schema=schema, 6860 path=path, 6861 error_handling=error_handling, 6862 empty_handling=empty_handling, 6863 ) 6864 6865 def _parse_match_against(self) -> exp.MatchAgainst: 6866 expressions = self._parse_csv(self._parse_column) 6867 6868 self._match_text_seq(")", "AGAINST", "(") 6869 6870 this = self._parse_string() 6871 6872 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6873 modifier = "IN NATURAL LANGUAGE MODE" 6874 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6875 modifier = f"{modifier} WITH QUERY EXPANSION" 6876 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6877 modifier = "IN BOOLEAN MODE" 6878 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6879 modifier = "WITH QUERY EXPANSION" 6880 else: 6881 modifier = None 6882 6883 return self.expression( 6884 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6885 ) 6886 6887 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6888 def _parse_open_json(self) -> exp.OpenJSON: 6889 this = self._parse_bitwise() 6890 path = self._match(TokenType.COMMA) and self._parse_string() 6891 6892 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6893 this = self._parse_field(any_token=True) 6894 kind = self._parse_types() 6895 path = self._parse_string() 6896 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6897 6898 return self.expression( 6899 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6900 ) 6901 6902 expressions = None 6903 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6904 self._match_l_paren() 6905 expressions = self._parse_csv(_parse_open_json_column_def) 6906 6907 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6908 6909 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6910 args = self._parse_csv(self._parse_bitwise) 6911 6912 if self._match(TokenType.IN): 6913 return self.expression( 6914 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6915 ) 6916 6917 if haystack_first: 6918 haystack = seq_get(args, 0) 6919 needle = seq_get(args, 1) 6920 else: 6921 haystack = seq_get(args, 1) 6922 needle = seq_get(args, 0) 6923 6924 return self.expression( 6925 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6926 ) 6927 6928 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6929 args = self._parse_csv(self._parse_table) 6930 return exp.JoinHint(this=func_name.upper(), expressions=args) 6931 6932 def _parse_substring(self) -> exp.Substring: 6933 # Postgres supports the form: substring(string [from int] [for int]) 6934 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6935 6936 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6937 6938 if self._match(TokenType.FROM): 6939 args.append(self._parse_bitwise()) 6940 if self._match(TokenType.FOR): 6941 if len(args) == 1: 6942 args.append(exp.Literal.number(1)) 6943 args.append(self._parse_bitwise()) 6944 6945 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6946 6947 def _parse_trim(self) -> exp.Trim: 6948 # https://www.w3resource.com/sql/character-functions/trim.php 6949 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6950 6951 position = None 6952 collation = None 6953 expression = None 6954 6955 if self._match_texts(self.TRIM_TYPES): 6956 position = self._prev.text.upper() 6957 6958 this = self._parse_bitwise() 6959 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6960 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6961 expression = self._parse_bitwise() 6962 6963 if invert_order: 6964 this, expression = expression, this 6965 6966 if self._match(TokenType.COLLATE): 6967 collation = self._parse_bitwise() 6968 6969 return self.expression( 6970 exp.Trim, this=this, position=position, expression=expression, collation=collation 6971 ) 6972 6973 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6974 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6975 6976 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6977 return self._parse_window(self._parse_id_var(), alias=True) 6978 6979 def _parse_respect_or_ignore_nulls( 6980 self, this: t.Optional[exp.Expression] 6981 ) -> t.Optional[exp.Expression]: 6982 if self._match_text_seq("IGNORE", "NULLS"): 6983 return self.expression(exp.IgnoreNulls, this=this) 6984 if self._match_text_seq("RESPECT", "NULLS"): 6985 return self.expression(exp.RespectNulls, this=this) 6986 return this 6987 6988 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6989 if self._match(TokenType.HAVING): 6990 self._match_texts(("MAX", "MIN")) 6991 max = self._prev.text.upper() != "MIN" 6992 return self.expression( 6993 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6994 ) 6995 6996 return this 6997 6998 def _parse_window( 6999 self, this: t.Optional[exp.Expression], alias: bool = False 7000 ) -> t.Optional[exp.Expression]: 7001 func = this 7002 comments = func.comments if isinstance(func, exp.Expression) else None 7003 7004 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 7005 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 7006 if self._match_text_seq("WITHIN", "GROUP"): 7007 order = self._parse_wrapped(self._parse_order) 7008 this = self.expression(exp.WithinGroup, this=this, expression=order) 7009 7010 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 7011 self._match(TokenType.WHERE) 7012 this = self.expression( 7013 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 7014 ) 7015 self._match_r_paren() 7016 7017 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 7018 # Some dialects choose to implement and some do not. 7019 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 7020 7021 # There is some code above in _parse_lambda that handles 7022 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 7023 7024 # The below changes handle 7025 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 7026 7027 # Oracle allows both formats 7028 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 7029 # and Snowflake chose to do the same for familiarity 7030 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 7031 if isinstance(this, exp.AggFunc): 7032 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 7033 7034 if ignore_respect and ignore_respect is not this: 7035 ignore_respect.replace(ignore_respect.this) 7036 this = self.expression(ignore_respect.__class__, this=this) 7037 7038 this = self._parse_respect_or_ignore_nulls(this) 7039 7040 # bigquery select from window x AS (partition by ...) 7041 if alias: 7042 over = None 7043 self._match(TokenType.ALIAS) 7044 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 7045 return this 7046 else: 7047 over = self._prev.text.upper() 7048 7049 if comments and isinstance(func, exp.Expression): 7050 func.pop_comments() 7051 7052 if not self._match(TokenType.L_PAREN): 7053 return self.expression( 7054 exp.Window, 7055 comments=comments, 7056 this=this, 7057 alias=self._parse_id_var(False), 7058 over=over, 7059 ) 7060 7061 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7062 7063 first = self._match(TokenType.FIRST) 7064 if self._match_text_seq("LAST"): 7065 first = False 7066 7067 partition, order = self._parse_partition_and_order() 7068 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7069 7070 if kind: 7071 self._match(TokenType.BETWEEN) 7072 start = self._parse_window_spec() 7073 self._match(TokenType.AND) 7074 end = self._parse_window_spec() 7075 exclude = ( 7076 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7077 if self._match_text_seq("EXCLUDE") 7078 else None 7079 ) 7080 7081 spec = self.expression( 7082 exp.WindowSpec, 7083 kind=kind, 7084 start=start["value"], 7085 start_side=start["side"], 7086 end=end["value"], 7087 end_side=end["side"], 7088 exclude=exclude, 7089 ) 7090 else: 7091 spec = None 7092 7093 self._match_r_paren() 7094 7095 window = self.expression( 7096 exp.Window, 7097 comments=comments, 7098 this=this, 7099 partition_by=partition, 7100 order=order, 7101 spec=spec, 7102 alias=window_alias, 7103 over=over, 7104 first=first, 7105 ) 7106 7107 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7108 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7109 return self._parse_window(window, alias=alias) 7110 7111 return window 7112 7113 def _parse_partition_and_order( 7114 self, 7115 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7116 return self._parse_partition_by(), self._parse_order() 7117 7118 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7119 self._match(TokenType.BETWEEN) 7120 7121 return { 7122 "value": ( 7123 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7124 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7125 or self._parse_bitwise() 7126 ), 7127 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7128 } 7129 7130 def _parse_alias( 7131 self, this: t.Optional[exp.Expression], explicit: bool = False 7132 ) -> t.Optional[exp.Expression]: 7133 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7134 # so this section tries to parse the clause version and if it fails, it treats the token 7135 # as an identifier (alias) 7136 if self._can_parse_limit_or_offset(): 7137 return this 7138 7139 any_token = self._match(TokenType.ALIAS) 7140 comments = self._prev_comments or [] 7141 7142 if explicit and not any_token: 7143 return this 7144 7145 if self._match(TokenType.L_PAREN): 7146 aliases = self.expression( 7147 exp.Aliases, 7148 comments=comments, 7149 this=this, 7150 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7151 ) 7152 self._match_r_paren(aliases) 7153 return aliases 7154 7155 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7156 self.STRING_ALIASES and self._parse_string_as_identifier() 7157 ) 7158 7159 if alias: 7160 comments.extend(alias.pop_comments()) 7161 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7162 column = this.this 7163 7164 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7165 if not this.comments and column and column.comments: 7166 this.comments = column.pop_comments() 7167 7168 return this 7169 7170 def _parse_id_var( 7171 self, 7172 any_token: bool = True, 7173 tokens: t.Optional[t.Collection[TokenType]] = None, 7174 ) -> t.Optional[exp.Expression]: 7175 expression = self._parse_identifier() 7176 if not expression and ( 7177 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7178 ): 7179 quoted = self._prev.token_type == TokenType.STRING 7180 expression = self._identifier_expression(quoted=quoted) 7181 7182 return expression 7183 7184 def _parse_string(self) -> t.Optional[exp.Expression]: 7185 if self._match_set(self.STRING_PARSERS): 7186 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7187 return self._parse_placeholder() 7188 7189 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7190 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7191 if output: 7192 output.update_positions(self._prev) 7193 return output 7194 7195 def _parse_number(self) -> t.Optional[exp.Expression]: 7196 if self._match_set(self.NUMERIC_PARSERS): 7197 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7198 return self._parse_placeholder() 7199 7200 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7201 if self._match(TokenType.IDENTIFIER): 7202 return self._identifier_expression(quoted=True) 7203 return self._parse_placeholder() 7204 7205 def _parse_var( 7206 self, 7207 any_token: bool = False, 7208 tokens: t.Optional[t.Collection[TokenType]] = None, 7209 upper: bool = False, 7210 ) -> t.Optional[exp.Expression]: 7211 if ( 7212 (any_token and self._advance_any()) 7213 or self._match(TokenType.VAR) 7214 or (self._match_set(tokens) if tokens else False) 7215 ): 7216 return self.expression( 7217 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7218 ) 7219 return self._parse_placeholder() 7220 7221 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7222 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7223 self._advance() 7224 return self._prev 7225 return None 7226 7227 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7228 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7229 7230 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7231 return self._parse_primary() or self._parse_var(any_token=True) 7232 7233 def _parse_null(self) -> t.Optional[exp.Expression]: 7234 if self._match_set(self.NULL_TOKENS): 7235 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7236 return self._parse_placeholder() 7237 7238 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7239 if self._match(TokenType.TRUE): 7240 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7241 if self._match(TokenType.FALSE): 7242 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7243 return self._parse_placeholder() 7244 7245 def _parse_star(self) -> t.Optional[exp.Expression]: 7246 if self._match(TokenType.STAR): 7247 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7248 return self._parse_placeholder() 7249 7250 def _parse_parameter(self) -> exp.Parameter: 7251 this = self._parse_identifier() or self._parse_primary_or_var() 7252 return self.expression(exp.Parameter, this=this) 7253 7254 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7255 if self._match_set(self.PLACEHOLDER_PARSERS): 7256 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7257 if placeholder: 7258 return placeholder 7259 self._advance(-1) 7260 return None 7261 7262 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7263 if not self._match_texts(keywords): 7264 return None 7265 if self._match(TokenType.L_PAREN, advance=False): 7266 return self._parse_wrapped_csv(self._parse_expression) 7267 7268 expression = self._parse_expression() 7269 return [expression] if expression else None 7270 7271 def _parse_csv( 7272 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7273 ) -> t.List[exp.Expression]: 7274 parse_result = parse_method() 7275 items = [parse_result] if parse_result is not None else [] 7276 7277 while self._match(sep): 7278 self._add_comments(parse_result) 7279 parse_result = parse_method() 7280 if parse_result is not None: 7281 items.append(parse_result) 7282 7283 return items 7284 7285 def _parse_tokens( 7286 self, parse_method: t.Callable, expressions: t.Dict 7287 ) -> t.Optional[exp.Expression]: 7288 this = parse_method() 7289 7290 while self._match_set(expressions): 7291 this = self.expression( 7292 expressions[self._prev.token_type], 7293 this=this, 7294 comments=self._prev_comments, 7295 expression=parse_method(), 7296 ) 7297 7298 return this 7299 7300 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7301 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7302 7303 def _parse_wrapped_csv( 7304 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7305 ) -> t.List[exp.Expression]: 7306 return self._parse_wrapped( 7307 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7308 ) 7309 7310 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7311 wrapped = self._match(TokenType.L_PAREN) 7312 if not wrapped and not optional: 7313 self.raise_error("Expecting (") 7314 parse_result = parse_method() 7315 if wrapped: 7316 self._match_r_paren() 7317 return parse_result 7318 7319 def _parse_expressions(self) -> t.List[exp.Expression]: 7320 return self._parse_csv(self._parse_expression) 7321 7322 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7323 return ( 7324 self._parse_set_operations( 7325 self._parse_alias(self._parse_assignment(), explicit=True) 7326 if alias 7327 else self._parse_assignment() 7328 ) 7329 or self._parse_select() 7330 ) 7331 7332 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7333 return self._parse_query_modifiers( 7334 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7335 ) 7336 7337 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7338 this = None 7339 if self._match_texts(self.TRANSACTION_KIND): 7340 this = self._prev.text 7341 7342 self._match_texts(("TRANSACTION", "WORK")) 7343 7344 modes = [] 7345 while True: 7346 mode = [] 7347 while self._match(TokenType.VAR): 7348 mode.append(self._prev.text) 7349 7350 if mode: 7351 modes.append(" ".join(mode)) 7352 if not self._match(TokenType.COMMA): 7353 break 7354 7355 return self.expression(exp.Transaction, this=this, modes=modes) 7356 7357 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7358 chain = None 7359 savepoint = None 7360 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7361 7362 self._match_texts(("TRANSACTION", "WORK")) 7363 7364 if self._match_text_seq("TO"): 7365 self._match_text_seq("SAVEPOINT") 7366 savepoint = self._parse_id_var() 7367 7368 if self._match(TokenType.AND): 7369 chain = not self._match_text_seq("NO") 7370 self._match_text_seq("CHAIN") 7371 7372 if is_rollback: 7373 return self.expression(exp.Rollback, savepoint=savepoint) 7374 7375 return self.expression(exp.Commit, chain=chain) 7376 7377 def _parse_refresh(self) -> exp.Refresh: 7378 self._match(TokenType.TABLE) 7379 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7380 7381 def _parse_column_def_with_exists(self): 7382 start = self._index 7383 self._match(TokenType.COLUMN) 7384 7385 exists_column = self._parse_exists(not_=True) 7386 expression = self._parse_field_def() 7387 7388 if not isinstance(expression, exp.ColumnDef): 7389 self._retreat(start) 7390 return None 7391 7392 expression.set("exists", exists_column) 7393 7394 return expression 7395 7396 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7397 if not self._prev.text.upper() == "ADD": 7398 return None 7399 7400 expression = self._parse_column_def_with_exists() 7401 if not expression: 7402 return None 7403 7404 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7405 if self._match_texts(("FIRST", "AFTER")): 7406 position = self._prev.text 7407 column_position = self.expression( 7408 exp.ColumnPosition, this=self._parse_column(), position=position 7409 ) 7410 expression.set("position", column_position) 7411 7412 return expression 7413 7414 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7415 drop = self._match(TokenType.DROP) and self._parse_drop() 7416 if drop and not isinstance(drop, exp.Command): 7417 drop.set("kind", drop.args.get("kind", "COLUMN")) 7418 return drop 7419 7420 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7421 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7422 return self.expression( 7423 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7424 ) 7425 7426 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7427 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7428 self._match_text_seq("ADD") 7429 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7430 return self.expression( 7431 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7432 ) 7433 7434 column_def = self._parse_add_column() 7435 if isinstance(column_def, exp.ColumnDef): 7436 return column_def 7437 7438 exists = self._parse_exists(not_=True) 7439 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7440 return self.expression( 7441 exp.AddPartition, 7442 exists=exists, 7443 this=self._parse_field(any_token=True), 7444 location=self._match_text_seq("LOCATION", advance=False) 7445 and self._parse_property(), 7446 ) 7447 7448 return None 7449 7450 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7451 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7452 or self._match_text_seq("COLUMNS") 7453 ): 7454 schema = self._parse_schema() 7455 7456 return ( 7457 ensure_list(schema) 7458 if schema 7459 else self._parse_csv(self._parse_column_def_with_exists) 7460 ) 7461 7462 return self._parse_csv(_parse_add_alteration) 7463 7464 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7465 if self._match_texts(self.ALTER_ALTER_PARSERS): 7466 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7467 7468 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7469 # keyword after ALTER we default to parsing this statement 7470 self._match(TokenType.COLUMN) 7471 column = self._parse_field(any_token=True) 7472 7473 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7474 return self.expression(exp.AlterColumn, this=column, drop=True) 7475 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7476 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7477 if self._match(TokenType.COMMENT): 7478 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7479 if self._match_text_seq("DROP", "NOT", "NULL"): 7480 return self.expression( 7481 exp.AlterColumn, 7482 this=column, 7483 drop=True, 7484 allow_null=True, 7485 ) 7486 if self._match_text_seq("SET", "NOT", "NULL"): 7487 return self.expression( 7488 exp.AlterColumn, 7489 this=column, 7490 allow_null=False, 7491 ) 7492 7493 if self._match_text_seq("SET", "VISIBLE"): 7494 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7495 if self._match_text_seq("SET", "INVISIBLE"): 7496 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7497 7498 self._match_text_seq("SET", "DATA") 7499 self._match_text_seq("TYPE") 7500 return self.expression( 7501 exp.AlterColumn, 7502 this=column, 7503 dtype=self._parse_types(), 7504 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7505 using=self._match(TokenType.USING) and self._parse_assignment(), 7506 ) 7507 7508 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7509 if self._match_texts(("ALL", "EVEN", "AUTO")): 7510 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7511 7512 self._match_text_seq("KEY", "DISTKEY") 7513 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7514 7515 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7516 if compound: 7517 self._match_text_seq("SORTKEY") 7518 7519 if self._match(TokenType.L_PAREN, advance=False): 7520 return self.expression( 7521 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7522 ) 7523 7524 self._match_texts(("AUTO", "NONE")) 7525 return self.expression( 7526 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7527 ) 7528 7529 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7530 index = self._index - 1 7531 7532 partition_exists = self._parse_exists() 7533 if self._match(TokenType.PARTITION, advance=False): 7534 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7535 7536 self._retreat(index) 7537 return self._parse_csv(self._parse_drop_column) 7538 7539 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7540 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7541 exists = self._parse_exists() 7542 old_column = self._parse_column() 7543 to = self._match_text_seq("TO") 7544 new_column = self._parse_column() 7545 7546 if old_column is None or to is None or new_column is None: 7547 return None 7548 7549 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7550 7551 self._match_text_seq("TO") 7552 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7553 7554 def _parse_alter_table_set(self) -> exp.AlterSet: 7555 alter_set = self.expression(exp.AlterSet) 7556 7557 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7558 "TABLE", "PROPERTIES" 7559 ): 7560 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7561 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7562 alter_set.set("expressions", [self._parse_assignment()]) 7563 elif self._match_texts(("LOGGED", "UNLOGGED")): 7564 alter_set.set("option", exp.var(self._prev.text.upper())) 7565 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7566 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7567 elif self._match_text_seq("LOCATION"): 7568 alter_set.set("location", self._parse_field()) 7569 elif self._match_text_seq("ACCESS", "METHOD"): 7570 alter_set.set("access_method", self._parse_field()) 7571 elif self._match_text_seq("TABLESPACE"): 7572 alter_set.set("tablespace", self._parse_field()) 7573 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7574 alter_set.set("file_format", [self._parse_field()]) 7575 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7576 alter_set.set("file_format", self._parse_wrapped_options()) 7577 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7578 alter_set.set("copy_options", self._parse_wrapped_options()) 7579 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7580 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7581 else: 7582 if self._match_text_seq("SERDE"): 7583 alter_set.set("serde", self._parse_field()) 7584 7585 properties = self._parse_wrapped(self._parse_properties, optional=True) 7586 alter_set.set("expressions", [properties]) 7587 7588 return alter_set 7589 7590 def _parse_alter_session(self) -> exp.AlterSession: 7591 """Parse ALTER SESSION SET/UNSET statements.""" 7592 if self._match(TokenType.SET): 7593 expressions = self._parse_csv(lambda: self._parse_set_item_assignment()) 7594 return self.expression(exp.AlterSession, expressions=expressions, unset=False) 7595 7596 self._match_text_seq("UNSET") 7597 expressions = self._parse_csv( 7598 lambda: self.expression(exp.SetItem, this=self._parse_id_var(any_token=True)) 7599 ) 7600 return self.expression(exp.AlterSession, expressions=expressions, unset=True) 7601 7602 def _parse_alter(self) -> exp.Alter | exp.Command: 7603 start = self._prev 7604 7605 alter_token = self._match_set(self.ALTERABLES) and self._prev 7606 if not alter_token: 7607 return self._parse_as_command(start) 7608 7609 exists = self._parse_exists() 7610 only = self._match_text_seq("ONLY") 7611 7612 if alter_token.token_type == TokenType.SESSION: 7613 this = None 7614 check = None 7615 cluster = None 7616 else: 7617 this = self._parse_table(schema=True) 7618 check = self._match_text_seq("WITH", "CHECK") 7619 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7620 7621 if self._next: 7622 self._advance() 7623 7624 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7625 if parser: 7626 actions = ensure_list(parser(self)) 7627 not_valid = self._match_text_seq("NOT", "VALID") 7628 options = self._parse_csv(self._parse_property) 7629 7630 if not self._curr and actions: 7631 return self.expression( 7632 exp.Alter, 7633 this=this, 7634 kind=alter_token.text.upper(), 7635 exists=exists, 7636 actions=actions, 7637 only=only, 7638 options=options, 7639 cluster=cluster, 7640 not_valid=not_valid, 7641 check=check, 7642 ) 7643 7644 return self._parse_as_command(start) 7645 7646 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7647 start = self._prev 7648 # https://duckdb.org/docs/sql/statements/analyze 7649 if not self._curr: 7650 return self.expression(exp.Analyze) 7651 7652 options = [] 7653 while self._match_texts(self.ANALYZE_STYLES): 7654 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7655 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7656 else: 7657 options.append(self._prev.text.upper()) 7658 7659 this: t.Optional[exp.Expression] = None 7660 inner_expression: t.Optional[exp.Expression] = None 7661 7662 kind = self._curr and self._curr.text.upper() 7663 7664 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7665 this = self._parse_table_parts() 7666 elif self._match_text_seq("TABLES"): 7667 if self._match_set((TokenType.FROM, TokenType.IN)): 7668 kind = f"{kind} {self._prev.text.upper()}" 7669 this = self._parse_table(schema=True, is_db_reference=True) 7670 elif self._match_text_seq("DATABASE"): 7671 this = self._parse_table(schema=True, is_db_reference=True) 7672 elif self._match_text_seq("CLUSTER"): 7673 this = self._parse_table() 7674 # Try matching inner expr keywords before fallback to parse table. 7675 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7676 kind = None 7677 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7678 else: 7679 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7680 kind = None 7681 this = self._parse_table_parts() 7682 7683 partition = self._try_parse(self._parse_partition) 7684 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7685 return self._parse_as_command(start) 7686 7687 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7688 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7689 "WITH", "ASYNC", "MODE" 7690 ): 7691 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7692 else: 7693 mode = None 7694 7695 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7696 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7697 7698 properties = self._parse_properties() 7699 return self.expression( 7700 exp.Analyze, 7701 kind=kind, 7702 this=this, 7703 mode=mode, 7704 partition=partition, 7705 properties=properties, 7706 expression=inner_expression, 7707 options=options, 7708 ) 7709 7710 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7711 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7712 this = None 7713 kind = self._prev.text.upper() 7714 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7715 expressions = [] 7716 7717 if not self._match_text_seq("STATISTICS"): 7718 self.raise_error("Expecting token STATISTICS") 7719 7720 if self._match_text_seq("NOSCAN"): 7721 this = "NOSCAN" 7722 elif self._match(TokenType.FOR): 7723 if self._match_text_seq("ALL", "COLUMNS"): 7724 this = "FOR ALL COLUMNS" 7725 if self._match_texts("COLUMNS"): 7726 this = "FOR COLUMNS" 7727 expressions = self._parse_csv(self._parse_column_reference) 7728 elif self._match_text_seq("SAMPLE"): 7729 sample = self._parse_number() 7730 expressions = [ 7731 self.expression( 7732 exp.AnalyzeSample, 7733 sample=sample, 7734 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7735 ) 7736 ] 7737 7738 return self.expression( 7739 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7740 ) 7741 7742 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7743 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7744 kind = None 7745 this = None 7746 expression: t.Optional[exp.Expression] = None 7747 if self._match_text_seq("REF", "UPDATE"): 7748 kind = "REF" 7749 this = "UPDATE" 7750 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7751 this = "UPDATE SET DANGLING TO NULL" 7752 elif self._match_text_seq("STRUCTURE"): 7753 kind = "STRUCTURE" 7754 if self._match_text_seq("CASCADE", "FAST"): 7755 this = "CASCADE FAST" 7756 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7757 ("ONLINE", "OFFLINE") 7758 ): 7759 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7760 expression = self._parse_into() 7761 7762 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7763 7764 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7765 this = self._prev.text.upper() 7766 if self._match_text_seq("COLUMNS"): 7767 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7768 return None 7769 7770 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7771 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7772 if self._match_text_seq("STATISTICS"): 7773 return self.expression(exp.AnalyzeDelete, kind=kind) 7774 return None 7775 7776 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7777 if self._match_text_seq("CHAINED", "ROWS"): 7778 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7779 return None 7780 7781 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7782 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7783 this = self._prev.text.upper() 7784 expression: t.Optional[exp.Expression] = None 7785 expressions = [] 7786 update_options = None 7787 7788 if self._match_text_seq("HISTOGRAM", "ON"): 7789 expressions = self._parse_csv(self._parse_column_reference) 7790 with_expressions = [] 7791 while self._match(TokenType.WITH): 7792 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7793 if self._match_texts(("SYNC", "ASYNC")): 7794 if self._match_text_seq("MODE", advance=False): 7795 with_expressions.append(f"{self._prev.text.upper()} MODE") 7796 self._advance() 7797 else: 7798 buckets = self._parse_number() 7799 if self._match_text_seq("BUCKETS"): 7800 with_expressions.append(f"{buckets} BUCKETS") 7801 if with_expressions: 7802 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7803 7804 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7805 TokenType.UPDATE, advance=False 7806 ): 7807 update_options = self._prev.text.upper() 7808 self._advance() 7809 elif self._match_text_seq("USING", "DATA"): 7810 expression = self.expression(exp.UsingData, this=self._parse_string()) 7811 7812 return self.expression( 7813 exp.AnalyzeHistogram, 7814 this=this, 7815 expressions=expressions, 7816 expression=expression, 7817 update_options=update_options, 7818 ) 7819 7820 def _parse_merge(self) -> exp.Merge: 7821 self._match(TokenType.INTO) 7822 target = self._parse_table() 7823 7824 if target and self._match(TokenType.ALIAS, advance=False): 7825 target.set("alias", self._parse_table_alias()) 7826 7827 self._match(TokenType.USING) 7828 using = self._parse_table() 7829 7830 self._match(TokenType.ON) 7831 on = self._parse_assignment() 7832 7833 return self.expression( 7834 exp.Merge, 7835 this=target, 7836 using=using, 7837 on=on, 7838 whens=self._parse_when_matched(), 7839 returning=self._parse_returning(), 7840 ) 7841 7842 def _parse_when_matched(self) -> exp.Whens: 7843 whens = [] 7844 7845 while self._match(TokenType.WHEN): 7846 matched = not self._match(TokenType.NOT) 7847 self._match_text_seq("MATCHED") 7848 source = ( 7849 False 7850 if self._match_text_seq("BY", "TARGET") 7851 else self._match_text_seq("BY", "SOURCE") 7852 ) 7853 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7854 7855 self._match(TokenType.THEN) 7856 7857 if self._match(TokenType.INSERT): 7858 this = self._parse_star() 7859 if this: 7860 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7861 else: 7862 then = self.expression( 7863 exp.Insert, 7864 this=exp.var("ROW") 7865 if self._match_text_seq("ROW") 7866 else self._parse_value(values=False), 7867 expression=self._match_text_seq("VALUES") and self._parse_value(), 7868 ) 7869 elif self._match(TokenType.UPDATE): 7870 expressions = self._parse_star() 7871 if expressions: 7872 then = self.expression(exp.Update, expressions=expressions) 7873 else: 7874 then = self.expression( 7875 exp.Update, 7876 expressions=self._match(TokenType.SET) 7877 and self._parse_csv(self._parse_equality), 7878 ) 7879 elif self._match(TokenType.DELETE): 7880 then = self.expression(exp.Var, this=self._prev.text) 7881 else: 7882 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7883 7884 whens.append( 7885 self.expression( 7886 exp.When, 7887 matched=matched, 7888 source=source, 7889 condition=condition, 7890 then=then, 7891 ) 7892 ) 7893 return self.expression(exp.Whens, expressions=whens) 7894 7895 def _parse_show(self) -> t.Optional[exp.Expression]: 7896 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7897 if parser: 7898 return parser(self) 7899 return self._parse_as_command(self._prev) 7900 7901 def _parse_set_item_assignment( 7902 self, kind: t.Optional[str] = None 7903 ) -> t.Optional[exp.Expression]: 7904 index = self._index 7905 7906 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7907 return self._parse_set_transaction(global_=kind == "GLOBAL") 7908 7909 left = self._parse_primary() or self._parse_column() 7910 assignment_delimiter = self._match_texts(("=", "TO")) 7911 7912 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7913 self._retreat(index) 7914 return None 7915 7916 right = self._parse_statement() or self._parse_id_var() 7917 if isinstance(right, (exp.Column, exp.Identifier)): 7918 right = exp.var(right.name) 7919 7920 this = self.expression(exp.EQ, this=left, expression=right) 7921 return self.expression(exp.SetItem, this=this, kind=kind) 7922 7923 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7924 self._match_text_seq("TRANSACTION") 7925 characteristics = self._parse_csv( 7926 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7927 ) 7928 return self.expression( 7929 exp.SetItem, 7930 expressions=characteristics, 7931 kind="TRANSACTION", 7932 **{"global": global_}, # type: ignore 7933 ) 7934 7935 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7936 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7937 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7938 7939 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7940 index = self._index 7941 set_ = self.expression( 7942 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7943 ) 7944 7945 if self._curr: 7946 self._retreat(index) 7947 return self._parse_as_command(self._prev) 7948 7949 return set_ 7950 7951 def _parse_var_from_options( 7952 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7953 ) -> t.Optional[exp.Var]: 7954 start = self._curr 7955 if not start: 7956 return None 7957 7958 option = start.text.upper() 7959 continuations = options.get(option) 7960 7961 index = self._index 7962 self._advance() 7963 for keywords in continuations or []: 7964 if isinstance(keywords, str): 7965 keywords = (keywords,) 7966 7967 if self._match_text_seq(*keywords): 7968 option = f"{option} {' '.join(keywords)}" 7969 break 7970 else: 7971 if continuations or continuations is None: 7972 if raise_unmatched: 7973 self.raise_error(f"Unknown option {option}") 7974 7975 self._retreat(index) 7976 return None 7977 7978 return exp.var(option) 7979 7980 def _parse_as_command(self, start: Token) -> exp.Command: 7981 while self._curr: 7982 self._advance() 7983 text = self._find_sql(start, self._prev) 7984 size = len(start.text) 7985 self._warn_unsupported() 7986 return exp.Command(this=text[:size], expression=text[size:]) 7987 7988 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7989 settings = [] 7990 7991 self._match_l_paren() 7992 kind = self._parse_id_var() 7993 7994 if self._match(TokenType.L_PAREN): 7995 while True: 7996 key = self._parse_id_var() 7997 value = self._parse_primary() 7998 if not key and value is None: 7999 break 8000 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 8001 self._match(TokenType.R_PAREN) 8002 8003 self._match_r_paren() 8004 8005 return self.expression( 8006 exp.DictProperty, 8007 this=this, 8008 kind=kind.this if kind else None, 8009 settings=settings, 8010 ) 8011 8012 def _parse_dict_range(self, this: str) -> exp.DictRange: 8013 self._match_l_paren() 8014 has_min = self._match_text_seq("MIN") 8015 if has_min: 8016 min = self._parse_var() or self._parse_primary() 8017 self._match_text_seq("MAX") 8018 max = self._parse_var() or self._parse_primary() 8019 else: 8020 max = self._parse_var() or self._parse_primary() 8021 min = exp.Literal.number(0) 8022 self._match_r_paren() 8023 return self.expression(exp.DictRange, this=this, min=min, max=max) 8024 8025 def _parse_comprehension( 8026 self, this: t.Optional[exp.Expression] 8027 ) -> t.Optional[exp.Comprehension]: 8028 index = self._index 8029 expression = self._parse_column() 8030 if not self._match(TokenType.IN): 8031 self._retreat(index - 1) 8032 return None 8033 iterator = self._parse_column() 8034 condition = self._parse_assignment() if self._match_text_seq("IF") else None 8035 return self.expression( 8036 exp.Comprehension, 8037 this=this, 8038 expression=expression, 8039 iterator=iterator, 8040 condition=condition, 8041 ) 8042 8043 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 8044 if self._match(TokenType.HEREDOC_STRING): 8045 return self.expression(exp.Heredoc, this=self._prev.text) 8046 8047 if not self._match_text_seq("$"): 8048 return None 8049 8050 tags = ["$"] 8051 tag_text = None 8052 8053 if self._is_connected(): 8054 self._advance() 8055 tags.append(self._prev.text.upper()) 8056 else: 8057 self.raise_error("No closing $ found") 8058 8059 if tags[-1] != "$": 8060 if self._is_connected() and self._match_text_seq("$"): 8061 tag_text = tags[-1] 8062 tags.append("$") 8063 else: 8064 self.raise_error("No closing $ found") 8065 8066 heredoc_start = self._curr 8067 8068 while self._curr: 8069 if self._match_text_seq(*tags, advance=False): 8070 this = self._find_sql(heredoc_start, self._prev) 8071 self._advance(len(tags)) 8072 return self.expression(exp.Heredoc, this=this, tag=tag_text) 8073 8074 self._advance() 8075 8076 self.raise_error(f"No closing {''.join(tags)} found") 8077 return None 8078 8079 def _find_parser( 8080 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 8081 ) -> t.Optional[t.Callable]: 8082 if not self._curr: 8083 return None 8084 8085 index = self._index 8086 this = [] 8087 while True: 8088 # The current token might be multiple words 8089 curr = self._curr.text.upper() 8090 key = curr.split(" ") 8091 this.append(curr) 8092 8093 self._advance() 8094 result, trie = in_trie(trie, key) 8095 if result == TrieResult.FAILED: 8096 break 8097 8098 if result == TrieResult.EXISTS: 8099 subparser = parsers[" ".join(this)] 8100 return subparser 8101 8102 self._retreat(index) 8103 return None 8104 8105 def _match(self, token_type, advance=True, expression=None): 8106 if not self._curr: 8107 return None 8108 8109 if self._curr.token_type == token_type: 8110 if advance: 8111 self._advance() 8112 self._add_comments(expression) 8113 return True 8114 8115 return None 8116 8117 def _match_set(self, types, advance=True): 8118 if not self._curr: 8119 return None 8120 8121 if self._curr.token_type in types: 8122 if advance: 8123 self._advance() 8124 return True 8125 8126 return None 8127 8128 def _match_pair(self, token_type_a, token_type_b, advance=True): 8129 if not self._curr or not self._next: 8130 return None 8131 8132 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8133 if advance: 8134 self._advance(2) 8135 return True 8136 8137 return None 8138 8139 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8140 if not self._match(TokenType.L_PAREN, expression=expression): 8141 self.raise_error("Expecting (") 8142 8143 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8144 if not self._match(TokenType.R_PAREN, expression=expression): 8145 self.raise_error("Expecting )") 8146 8147 def _match_texts(self, texts, advance=True): 8148 if ( 8149 self._curr 8150 and self._curr.token_type != TokenType.STRING 8151 and self._curr.text.upper() in texts 8152 ): 8153 if advance: 8154 self._advance() 8155 return True 8156 return None 8157 8158 def _match_text_seq(self, *texts, advance=True): 8159 index = self._index 8160 for text in texts: 8161 if ( 8162 self._curr 8163 and self._curr.token_type != TokenType.STRING 8164 and self._curr.text.upper() == text 8165 ): 8166 self._advance() 8167 else: 8168 self._retreat(index) 8169 return None 8170 8171 if not advance: 8172 self._retreat(index) 8173 8174 return True 8175 8176 def _replace_lambda( 8177 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8178 ) -> t.Optional[exp.Expression]: 8179 if not node: 8180 return node 8181 8182 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8183 8184 for column in node.find_all(exp.Column): 8185 typ = lambda_types.get(column.parts[0].name) 8186 if typ is not None: 8187 dot_or_id = column.to_dot() if column.table else column.this 8188 8189 if typ: 8190 dot_or_id = self.expression( 8191 exp.Cast, 8192 this=dot_or_id, 8193 to=typ, 8194 ) 8195 8196 parent = column.parent 8197 8198 while isinstance(parent, exp.Dot): 8199 if not isinstance(parent.parent, exp.Dot): 8200 parent.replace(dot_or_id) 8201 break 8202 parent = parent.parent 8203 else: 8204 if column is node: 8205 node = dot_or_id 8206 else: 8207 column.replace(dot_or_id) 8208 return node 8209 8210 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8211 start = self._prev 8212 8213 # Not to be confused with TRUNCATE(number, decimals) function call 8214 if self._match(TokenType.L_PAREN): 8215 self._retreat(self._index - 2) 8216 return self._parse_function() 8217 8218 # Clickhouse supports TRUNCATE DATABASE as well 8219 is_database = self._match(TokenType.DATABASE) 8220 8221 self._match(TokenType.TABLE) 8222 8223 exists = self._parse_exists(not_=False) 8224 8225 expressions = self._parse_csv( 8226 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8227 ) 8228 8229 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8230 8231 if self._match_text_seq("RESTART", "IDENTITY"): 8232 identity = "RESTART" 8233 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8234 identity = "CONTINUE" 8235 else: 8236 identity = None 8237 8238 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8239 option = self._prev.text 8240 else: 8241 option = None 8242 8243 partition = self._parse_partition() 8244 8245 # Fallback case 8246 if self._curr: 8247 return self._parse_as_command(start) 8248 8249 return self.expression( 8250 exp.TruncateTable, 8251 expressions=expressions, 8252 is_database=is_database, 8253 exists=exists, 8254 cluster=cluster, 8255 identity=identity, 8256 option=option, 8257 partition=partition, 8258 ) 8259 8260 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8261 this = self._parse_ordered(self._parse_opclass) 8262 8263 if not self._match(TokenType.WITH): 8264 return this 8265 8266 op = self._parse_var(any_token=True) 8267 8268 return self.expression(exp.WithOperator, this=this, op=op) 8269 8270 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8271 self._match(TokenType.EQ) 8272 self._match(TokenType.L_PAREN) 8273 8274 opts: t.List[t.Optional[exp.Expression]] = [] 8275 option: exp.Expression | None 8276 while self._curr and not self._match(TokenType.R_PAREN): 8277 if self._match_text_seq("FORMAT_NAME", "="): 8278 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8279 option = self._parse_format_name() 8280 else: 8281 option = self._parse_property() 8282 8283 if option is None: 8284 self.raise_error("Unable to parse option") 8285 break 8286 8287 opts.append(option) 8288 8289 return opts 8290 8291 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8292 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8293 8294 options = [] 8295 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8296 option = self._parse_var(any_token=True) 8297 prev = self._prev.text.upper() 8298 8299 # Different dialects might separate options and values by white space, "=" and "AS" 8300 self._match(TokenType.EQ) 8301 self._match(TokenType.ALIAS) 8302 8303 param = self.expression(exp.CopyParameter, this=option) 8304 8305 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8306 TokenType.L_PAREN, advance=False 8307 ): 8308 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8309 param.set("expressions", self._parse_wrapped_options()) 8310 elif prev == "FILE_FORMAT": 8311 # T-SQL's external file format case 8312 param.set("expression", self._parse_field()) 8313 else: 8314 param.set("expression", self._parse_unquoted_field()) 8315 8316 options.append(param) 8317 self._match(sep) 8318 8319 return options 8320 8321 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8322 expr = self.expression(exp.Credentials) 8323 8324 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8325 expr.set("storage", self._parse_field()) 8326 if self._match_text_seq("CREDENTIALS"): 8327 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8328 creds = ( 8329 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8330 ) 8331 expr.set("credentials", creds) 8332 if self._match_text_seq("ENCRYPTION"): 8333 expr.set("encryption", self._parse_wrapped_options()) 8334 if self._match_text_seq("IAM_ROLE"): 8335 expr.set("iam_role", self._parse_field()) 8336 if self._match_text_seq("REGION"): 8337 expr.set("region", self._parse_field()) 8338 8339 return expr 8340 8341 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8342 return self._parse_field() 8343 8344 def _parse_copy(self) -> exp.Copy | exp.Command: 8345 start = self._prev 8346 8347 self._match(TokenType.INTO) 8348 8349 this = ( 8350 self._parse_select(nested=True, parse_subquery_alias=False) 8351 if self._match(TokenType.L_PAREN, advance=False) 8352 else self._parse_table(schema=True) 8353 ) 8354 8355 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8356 8357 files = self._parse_csv(self._parse_file_location) 8358 credentials = self._parse_credentials() 8359 8360 self._match_text_seq("WITH") 8361 8362 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8363 8364 # Fallback case 8365 if self._curr: 8366 return self._parse_as_command(start) 8367 8368 return self.expression( 8369 exp.Copy, 8370 this=this, 8371 kind=kind, 8372 credentials=credentials, 8373 files=files, 8374 params=params, 8375 ) 8376 8377 def _parse_normalize(self) -> exp.Normalize: 8378 return self.expression( 8379 exp.Normalize, 8380 this=self._parse_bitwise(), 8381 form=self._match(TokenType.COMMA) and self._parse_var(), 8382 ) 8383 8384 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8385 args = self._parse_csv(lambda: self._parse_lambda()) 8386 8387 this = seq_get(args, 0) 8388 decimals = seq_get(args, 1) 8389 8390 return expr_type( 8391 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8392 ) 8393 8394 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8395 star_token = self._prev 8396 8397 if self._match_text_seq("COLUMNS", "(", advance=False): 8398 this = self._parse_function() 8399 if isinstance(this, exp.Columns): 8400 this.set("unpack", True) 8401 return this 8402 8403 return self.expression( 8404 exp.Star, 8405 **{ # type: ignore 8406 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8407 "replace": self._parse_star_op("REPLACE"), 8408 "rename": self._parse_star_op("RENAME"), 8409 }, 8410 ).update_positions(star_token) 8411 8412 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8413 privilege_parts = [] 8414 8415 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8416 # (end of privilege list) or L_PAREN (start of column list) are met 8417 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8418 privilege_parts.append(self._curr.text.upper()) 8419 self._advance() 8420 8421 this = exp.var(" ".join(privilege_parts)) 8422 expressions = ( 8423 self._parse_wrapped_csv(self._parse_column) 8424 if self._match(TokenType.L_PAREN, advance=False) 8425 else None 8426 ) 8427 8428 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8429 8430 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8431 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8432 principal = self._parse_id_var() 8433 8434 if not principal: 8435 return None 8436 8437 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8438 8439 def _parse_grant_revoke_common( 8440 self, 8441 ) -> t.Tuple[t.Optional[t.List], t.Optional[str], t.Optional[exp.Expression]]: 8442 privileges = self._parse_csv(self._parse_grant_privilege) 8443 8444 self._match(TokenType.ON) 8445 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8446 8447 # Attempt to parse the securable e.g. MySQL allows names 8448 # such as "foo.*", "*.*" which are not easily parseable yet 8449 securable = self._try_parse(self._parse_table_parts) 8450 8451 return privileges, kind, securable 8452 8453 def _parse_grant(self) -> exp.Grant | exp.Command: 8454 start = self._prev 8455 8456 privileges, kind, securable = self._parse_grant_revoke_common() 8457 8458 if not securable or not self._match_text_seq("TO"): 8459 return self._parse_as_command(start) 8460 8461 principals = self._parse_csv(self._parse_grant_principal) 8462 8463 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8464 8465 if self._curr: 8466 return self._parse_as_command(start) 8467 8468 return self.expression( 8469 exp.Grant, 8470 privileges=privileges, 8471 kind=kind, 8472 securable=securable, 8473 principals=principals, 8474 grant_option=grant_option, 8475 ) 8476 8477 def _parse_revoke(self) -> exp.Revoke | exp.Command: 8478 start = self._prev 8479 8480 grant_option = self._match_text_seq("GRANT", "OPTION", "FOR") 8481 8482 privileges, kind, securable = self._parse_grant_revoke_common() 8483 8484 if not securable or not self._match_text_seq("FROM"): 8485 return self._parse_as_command(start) 8486 8487 principals = self._parse_csv(self._parse_grant_principal) 8488 8489 cascade = None 8490 if self._match_texts(("CASCADE", "RESTRICT")): 8491 cascade = self._prev.text.upper() 8492 8493 if self._curr: 8494 return self._parse_as_command(start) 8495 8496 return self.expression( 8497 exp.Revoke, 8498 privileges=privileges, 8499 kind=kind, 8500 securable=securable, 8501 principals=principals, 8502 grant_option=grant_option, 8503 cascade=cascade, 8504 ) 8505 8506 def _parse_overlay(self) -> exp.Overlay: 8507 return self.expression( 8508 exp.Overlay, 8509 **{ # type: ignore 8510 "this": self._parse_bitwise(), 8511 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8512 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8513 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8514 }, 8515 ) 8516 8517 def _parse_format_name(self) -> exp.Property: 8518 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8519 # for FILE_FORMAT = <format_name> 8520 return self.expression( 8521 exp.Property, 8522 this=exp.var("FORMAT_NAME"), 8523 value=self._parse_string() or self._parse_table_parts(), 8524 ) 8525 8526 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8527 args: t.List[exp.Expression] = [] 8528 8529 if self._match(TokenType.DISTINCT): 8530 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8531 self._match(TokenType.COMMA) 8532 8533 args.extend(self._parse_csv(self._parse_assignment)) 8534 8535 return self.expression( 8536 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8537 ) 8538 8539 def _identifier_expression( 8540 self, token: t.Optional[Token] = None, **kwargs: t.Any 8541 ) -> exp.Identifier: 8542 token = token or self._prev 8543 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8544 expression.update_positions(token) 8545 return expression 8546 8547 def _build_pipe_cte( 8548 self, 8549 query: exp.Query, 8550 expressions: t.List[exp.Expression], 8551 alias_cte: t.Optional[exp.TableAlias] = None, 8552 ) -> exp.Select: 8553 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8554 if alias_cte: 8555 new_cte = alias_cte 8556 else: 8557 self._pipe_cte_counter += 1 8558 new_cte = f"__tmp{self._pipe_cte_counter}" 8559 8560 with_ = query.args.get("with") 8561 ctes = with_.pop() if with_ else None 8562 8563 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8564 if ctes: 8565 new_select.set("with", ctes) 8566 8567 return new_select.with_(new_cte, as_=query, copy=False) 8568 8569 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8570 select = self._parse_select(consume_pipe=False) 8571 if not select: 8572 return query 8573 8574 return self._build_pipe_cte( 8575 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8576 ) 8577 8578 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8579 limit = self._parse_limit() 8580 offset = self._parse_offset() 8581 if limit: 8582 curr_limit = query.args.get("limit", limit) 8583 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8584 query.limit(limit, copy=False) 8585 if offset: 8586 curr_offset = query.args.get("offset") 8587 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8588 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8589 8590 return query 8591 8592 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8593 this = self._parse_assignment() 8594 if self._match_text_seq("GROUP", "AND", advance=False): 8595 return this 8596 8597 this = self._parse_alias(this) 8598 8599 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8600 return self._parse_ordered(lambda: this) 8601 8602 return this 8603 8604 def _parse_pipe_syntax_aggregate_group_order_by( 8605 self, query: exp.Select, group_by_exists: bool = True 8606 ) -> exp.Select: 8607 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8608 aggregates_or_groups, orders = [], [] 8609 for element in expr: 8610 if isinstance(element, exp.Ordered): 8611 this = element.this 8612 if isinstance(this, exp.Alias): 8613 element.set("this", this.args["alias"]) 8614 orders.append(element) 8615 else: 8616 this = element 8617 aggregates_or_groups.append(this) 8618 8619 if group_by_exists: 8620 query.select(*aggregates_or_groups, copy=False).group_by( 8621 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8622 copy=False, 8623 ) 8624 else: 8625 query.select(*aggregates_or_groups, append=False, copy=False) 8626 8627 if orders: 8628 return query.order_by(*orders, append=False, copy=False) 8629 8630 return query 8631 8632 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8633 self._match_text_seq("AGGREGATE") 8634 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8635 8636 if self._match(TokenType.GROUP_BY) or ( 8637 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8638 ): 8639 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8640 8641 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8642 8643 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8644 first_setop = self.parse_set_operation(this=query) 8645 if not first_setop: 8646 return None 8647 8648 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8649 expr = self._parse_paren() 8650 return expr.assert_is(exp.Subquery).unnest() if expr else None 8651 8652 first_setop.this.pop() 8653 8654 setops = [ 8655 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8656 *self._parse_csv(_parse_and_unwrap_query), 8657 ] 8658 8659 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8660 with_ = query.args.get("with") 8661 ctes = with_.pop() if with_ else None 8662 8663 if isinstance(first_setop, exp.Union): 8664 query = query.union(*setops, copy=False, **first_setop.args) 8665 elif isinstance(first_setop, exp.Except): 8666 query = query.except_(*setops, copy=False, **first_setop.args) 8667 else: 8668 query = query.intersect(*setops, copy=False, **first_setop.args) 8669 8670 query.set("with", ctes) 8671 8672 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8673 8674 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8675 join = self._parse_join() 8676 if not join: 8677 return None 8678 8679 if isinstance(query, exp.Select): 8680 return query.join(join, copy=False) 8681 8682 return query 8683 8684 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8685 pivots = self._parse_pivots() 8686 if not pivots: 8687 return query 8688 8689 from_ = query.args.get("from") 8690 if from_: 8691 from_.this.set("pivots", pivots) 8692 8693 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8694 8695 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8696 self._match_text_seq("EXTEND") 8697 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8698 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8699 8700 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8701 sample = self._parse_table_sample() 8702 8703 with_ = query.args.get("with") 8704 if with_: 8705 with_.expressions[-1].this.set("sample", sample) 8706 else: 8707 query.set("sample", sample) 8708 8709 return query 8710 8711 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8712 if isinstance(query, exp.Subquery): 8713 query = exp.select("*").from_(query, copy=False) 8714 8715 if not query.args.get("from"): 8716 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8717 8718 while self._match(TokenType.PIPE_GT): 8719 start = self._curr 8720 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8721 if not parser: 8722 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8723 # keywords, making it tricky to disambiguate them without lookahead. The approach 8724 # here is to try and parse a set operation and if that fails, then try to parse a 8725 # join operator. If that fails as well, then the operator is not supported. 8726 parsed_query = self._parse_pipe_syntax_set_operator(query) 8727 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8728 if not parsed_query: 8729 self._retreat(start) 8730 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8731 break 8732 query = parsed_query 8733 else: 8734 query = parser(self, query) 8735 8736 return query 8737 8738 def _parse_declareitem(self) -> t.Optional[exp.DeclareItem]: 8739 vars = self._parse_csv(self._parse_id_var) 8740 if not vars: 8741 return None 8742 8743 return self.expression( 8744 exp.DeclareItem, 8745 this=vars, 8746 kind=self._parse_types(), 8747 default=self._match(TokenType.DEFAULT) and self._parse_bitwise(), 8748 ) 8749 8750 def _parse_declare(self) -> exp.Declare | exp.Command: 8751 start = self._prev 8752 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 8753 8754 if not expressions or self._curr: 8755 return self._parse_as_command(start) 8756 8757 return self.expression(exp.Declare, expressions=expressions) 8758 8759 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 8760 exp_class = exp.Cast if strict else exp.TryCast 8761 8762 if exp_class == exp.TryCast: 8763 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 8764 8765 return self.expression(exp_class, **kwargs) 8766 8767 def _parse_json_value(self) -> exp.JSONValue: 8768 this = self._parse_bitwise() 8769 self._match(TokenType.COMMA) 8770 path = self._parse_bitwise() 8771 8772 returning = self._match(TokenType.RETURNING) and self._parse_type() 8773 8774 return self.expression( 8775 exp.JSONValue, 8776 this=this, 8777 path=self.dialect.to_json_path(path), 8778 returning=returning, 8779 on_condition=self._parse_on_condition(), 8780 ) 8781 8782 def _parse_group_concat(self) -> t.Optional[exp.Expression]: 8783 def concat_exprs( 8784 node: t.Optional[exp.Expression], exprs: t.List[exp.Expression] 8785 ) -> exp.Expression: 8786 if isinstance(node, exp.Distinct) and len(node.expressions) > 1: 8787 concat_exprs = [ 8788 self.expression(exp.Concat, expressions=node.expressions, safe=True) 8789 ] 8790 node.set("expressions", concat_exprs) 8791 return node 8792 if len(exprs) == 1: 8793 return exprs[0] 8794 return self.expression(exp.Concat, expressions=args, safe=True) 8795 8796 args = self._parse_csv(self._parse_lambda) 8797 8798 if args: 8799 order = args[-1] if isinstance(args[-1], exp.Order) else None 8800 8801 if order: 8802 # Order By is the last (or only) expression in the list and has consumed the 'expr' before it, 8803 # remove 'expr' from exp.Order and add it back to args 8804 args[-1] = order.this 8805 order.set("this", concat_exprs(order.this, args)) 8806 8807 this = order or concat_exprs(args[0], args) 8808 else: 8809 this = None 8810 8811 separator = self._parse_field() if self._match(TokenType.SEPARATOR) else None 8812 8813 return self.expression(exp.GroupConcat, this=this, separator=separator)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1577 def __init__( 1578 self, 1579 error_level: t.Optional[ErrorLevel] = None, 1580 error_message_context: int = 100, 1581 max_errors: int = 3, 1582 dialect: DialectType = None, 1583 ): 1584 from sqlglot.dialects import Dialect 1585 1586 self.error_level = error_level or ErrorLevel.IMMEDIATE 1587 self.error_message_context = error_message_context 1588 self.max_errors = max_errors 1589 self.dialect = Dialect.get_or_raise(dialect) 1590 self.reset()
1603 def parse( 1604 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1605 ) -> t.List[t.Optional[exp.Expression]]: 1606 """ 1607 Parses a list of tokens and returns a list of syntax trees, one tree 1608 per parsed SQL statement. 1609 1610 Args: 1611 raw_tokens: The list of tokens. 1612 sql: The original SQL string, used to produce helpful debug messages. 1613 1614 Returns: 1615 The list of the produced syntax trees. 1616 """ 1617 return self._parse( 1618 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1619 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1621 def parse_into( 1622 self, 1623 expression_types: exp.IntoType, 1624 raw_tokens: t.List[Token], 1625 sql: t.Optional[str] = None, 1626 ) -> t.List[t.Optional[exp.Expression]]: 1627 """ 1628 Parses a list of tokens into a given Expression type. If a collection of Expression 1629 types is given instead, this method will try to parse the token list into each one 1630 of them, stopping at the first for which the parsing succeeds. 1631 1632 Args: 1633 expression_types: The expression type(s) to try and parse the token list into. 1634 raw_tokens: The list of tokens. 1635 sql: The original SQL string, used to produce helpful debug messages. 1636 1637 Returns: 1638 The target Expression. 1639 """ 1640 errors = [] 1641 for expression_type in ensure_list(expression_types): 1642 parser = self.EXPRESSION_PARSERS.get(expression_type) 1643 if not parser: 1644 raise TypeError(f"No parser registered for {expression_type}") 1645 1646 try: 1647 return self._parse(parser, raw_tokens, sql) 1648 except ParseError as e: 1649 e.errors[0]["into_expression"] = expression_type 1650 errors.append(e) 1651 1652 raise ParseError( 1653 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1654 errors=merge_errors(errors), 1655 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1695 def check_errors(self) -> None: 1696 """Logs or raises any found errors, depending on the chosen error level setting.""" 1697 if self.error_level == ErrorLevel.WARN: 1698 for error in self.errors: 1699 logger.error(str(error)) 1700 elif self.error_level == ErrorLevel.RAISE and self.errors: 1701 raise ParseError( 1702 concat_messages(self.errors, self.max_errors), 1703 errors=merge_errors(self.errors), 1704 )
Logs or raises any found errors, depending on the chosen error level setting.
1706 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1707 """ 1708 Appends an error in the list of recorded errors or raises it, depending on the chosen 1709 error level setting. 1710 """ 1711 token = token or self._curr or self._prev or Token.string("") 1712 start = token.start 1713 end = token.end + 1 1714 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1715 highlight = self.sql[start:end] 1716 end_context = self.sql[end : end + self.error_message_context] 1717 1718 error = ParseError.new( 1719 f"{message}. Line {token.line}, Col: {token.col}.\n" 1720 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1721 description=message, 1722 line=token.line, 1723 col=token.col, 1724 start_context=start_context, 1725 highlight=highlight, 1726 end_context=end_context, 1727 ) 1728 1729 if self.error_level == ErrorLevel.IMMEDIATE: 1730 raise error 1731 1732 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1734 def expression( 1735 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1736 ) -> E: 1737 """ 1738 Creates a new, validated Expression. 1739 1740 Args: 1741 exp_class: The expression class to instantiate. 1742 comments: An optional list of comments to attach to the expression. 1743 kwargs: The arguments to set for the expression along with their respective values. 1744 1745 Returns: 1746 The target expression. 1747 """ 1748 instance = exp_class(**kwargs) 1749 instance.add_comments(comments) if comments else self._add_comments(instance) 1750 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1757 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1758 """ 1759 Validates an Expression, making sure that all its mandatory arguments are set. 1760 1761 Args: 1762 expression: The expression to validate. 1763 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1764 1765 Returns: 1766 The validated expression. 1767 """ 1768 if self.error_level != ErrorLevel.IGNORE: 1769 for error_message in expression.error_messages(args): 1770 self.raise_error(error_message) 1771 1772 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.
4842 def parse_set_operation( 4843 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4844 ) -> t.Optional[exp.Expression]: 4845 start = self._index 4846 _, side_token, kind_token = self._parse_join_parts() 4847 4848 side = side_token.text if side_token else None 4849 kind = kind_token.text if kind_token else None 4850 4851 if not self._match_set(self.SET_OPERATIONS): 4852 self._retreat(start) 4853 return None 4854 4855 token_type = self._prev.token_type 4856 4857 if token_type == TokenType.UNION: 4858 operation: t.Type[exp.SetOperation] = exp.Union 4859 elif token_type == TokenType.EXCEPT: 4860 operation = exp.Except 4861 else: 4862 operation = exp.Intersect 4863 4864 comments = self._prev.comments 4865 4866 if self._match(TokenType.DISTINCT): 4867 distinct: t.Optional[bool] = True 4868 elif self._match(TokenType.ALL): 4869 distinct = False 4870 else: 4871 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4872 if distinct is None: 4873 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4874 4875 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4876 "STRICT", "CORRESPONDING" 4877 ) 4878 if self._match_text_seq("CORRESPONDING"): 4879 by_name = True 4880 if not side and not kind: 4881 kind = "INNER" 4882 4883 on_column_list = None 4884 if by_name and self._match_texts(("ON", "BY")): 4885 on_column_list = self._parse_wrapped_csv(self._parse_column) 4886 4887 expression = self._parse_select( 4888 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4889 ) 4890 4891 return self.expression( 4892 operation, 4893 comments=comments, 4894 this=this, 4895 distinct=distinct, 4896 by_name=by_name, 4897 expression=expression, 4898 side=side, 4899 kind=kind, 4900 on=on_column_list, 4901 )